diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen')
217 files changed, 19738 insertions, 6511 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp index f5dbaccfcad5..b9579441a0ba 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp @@ -319,8 +319,9 @@ static const Value *getNoopInput(const Value *V, NoopInput = Op; } else if (isa<TruncInst>(I) && TLI.allowTruncateForTailCall(Op->getType(), I->getType())) { - DataBits = std::min((uint64_t)DataBits, - I->getType()->getPrimitiveSizeInBits().getFixedSize()); + DataBits = + std::min((uint64_t)DataBits, + I->getType()->getPrimitiveSizeInBits().getFixedValue()); NoopInput = Op; } else if (auto *CB = dyn_cast<CallBase>(I)) { const Value *ReturnedOp = CB->getReturnedArgOperand(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp index 1940f46232d3..82b5ccdc70ea 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp @@ -21,9 +21,7 @@ namespace llvm { -AIXException::AIXException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {} - -void AIXException::markFunctionEnd() { endFragment(); } +AIXException::AIXException(AsmPrinter *A) : EHStreamer(A) {} void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA, const MCSymbol *PerSym) { @@ -62,7 +60,7 @@ void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA, const unsigned PointerSize = DL.getPointerSize(); // Add necessary paddings in 64 bit mode. - Asm->OutStreamer->emitValueToAlignment(PointerSize); + Asm->OutStreamer->emitValueToAlignment(Align(PointerSize)); // LSDA location. Asm->OutStreamer->emitValue(MCSymbolRefExpr::create(LSDA, Asm->OutContext), diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp index e04a29fbb42b..de6ebcf0c341 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -19,7 +19,7 @@ #include "llvm/MC/MCStreamer.h" using namespace llvm; -ARMException::ARMException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {} +ARMException::ARMException(AsmPrinter *A) : EHStreamer(A) {} ARMException::~ARMException() = default; @@ -48,6 +48,11 @@ void ARMException::beginFunction(const MachineFunction *MF) { } } +void ARMException::markFunctionEnd() { + if (shouldEmitCFI) + Asm->OutStreamer->emitCFIEndProc(); +} + /// endFunction - Gather and emit post-function exception information. /// void ARMException::endFunction(const MachineFunction *MF) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp index 9526bf7610b4..22ecc5199742 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -531,7 +531,7 @@ template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emit() { emitOffsets(EntryPool); emitAbbrevs(); emitData(); - Asm->OutStreamer->emitValueToAlignment(4, 0); + Asm->OutStreamer->emitValueToAlignment(Align(4), 0); Asm->OutStreamer->emitLabel(ContributionEnd); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 32a10ad41d1f..8c126d20fc9a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -119,6 +119,7 @@ #include <cstdint> #include <iterator> #include <memory> +#include <optional> #include <string> #include <utility> #include <vector> @@ -146,14 +147,6 @@ STATISTIC(EmittedInsts, "Number of machine instrs printed"); char AsmPrinter::ID = 0; -using gcp_map_type = DenseMap<GCStrategy *, std::unique_ptr<GCMetadataPrinter>>; - -static gcp_map_type &getGCMap(void *&P) { - if (!P) - P = new gcp_map_type(); - return *(gcp_map_type*)P; -} - namespace { class AddrLabelMapCallbackPtr final : CallbackVH { AddrLabelMap *Map = nullptr; @@ -354,20 +347,16 @@ Align AsmPrinter::getGVAlignment(const GlobalObject *GV, const DataLayout &DL, AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer) : MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()), - OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)) { + OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)), + SM(*this) { VerboseAsm = OutStreamer->isVerboseAsm(); + DwarfUsesRelocationsAcrossSections = + MAI->doesDwarfUseRelocationsAcrossSections(); } AsmPrinter::~AsmPrinter() { assert(!DD && Handlers.size() == NumUserHandlers && "Debug/EH info didn't get finalized"); - - if (GCMetadataPrinters) { - gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); - - delete &GCMap; - GCMetadataPrinters = nullptr; - } } bool AsmPrinter::isPositionIndependent() const { @@ -489,7 +478,7 @@ bool AsmPrinter::doInitialization(Module &M) { GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); assert(MI && "AsmPrinter didn't require GCModuleInfo?"); for (const auto &I : *MI) - if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I)) + if (GCMetadataPrinter *MP = getOrCreateGCPrinter(*I)) MP->beginAssembly(M, *MI, *this); // Emit module-level inline asm if it exists. @@ -529,7 +518,7 @@ bool AsmPrinter::doInitialization(Module &M) { switch (MAI->getExceptionHandlingType()) { case ExceptionHandling::None: // We may want to emit CFI for debug. - LLVM_FALLTHROUGH; + [[fallthrough]]; case ExceptionHandling::SjLj: case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: @@ -553,7 +542,7 @@ bool AsmPrinter::doInitialization(Module &M) { case ExceptionHandling::None: if (!needsCFIForDebug()) break; - LLVM_FALLTHROUGH; + [[fallthrough]]; case ExceptionHandling::SjLj: case ExceptionHandling::DwarfCFI: ES = new DwarfCFIException(this); @@ -710,6 +699,16 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { // GV's or GVSym's attributes will be used for the EmittedSym. emitVisibility(EmittedSym, GV->getVisibility(), !GV->isDeclaration()); + if (GV->isTagged()) { + Triple T = TM.getTargetTriple(); + + if (T.getArch() != Triple::aarch64 || !T.isAndroid()) + OutContext.reportError(SMLoc(), + "Tagged symbols (-fsanitize=memtag-globals) are " + "only supported on aarch64 + Android."); + OutStreamer->emitSymbolAttribute(EmittedSym, MAI->getMemtagAttr()); + } + if (!GV->hasInitializer()) // External globals require no extra code. return; @@ -742,10 +741,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { if (GVKind.isCommon()) { if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. // .comm _foo, 42, 4 - const bool SupportsAlignment = - getObjFileLowering().getCommDirectiveSupportsAlignment(); - OutStreamer->emitCommonSymbol(GVSym, Size, - SupportsAlignment ? Alignment.value() : 0); + OutStreamer->emitCommonSymbol(GVSym, Size, Alignment); return; } @@ -760,7 +756,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { Size = 1; // zerofill of 0 bytes is undefined. emitLinkage(GV, GVSym); // .zerofill __DATA, __bss, _foo, 400, 5 - OutStreamer->emitZerofill(TheSection, GVSym, Size, Alignment.value()); + OutStreamer->emitZerofill(TheSection, GVSym, Size, Alignment); return; } @@ -779,17 +775,14 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { // Prefer to simply fall back to .local / .comm in this case. if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) { // .lcomm _foo, 42 - OutStreamer->emitLocalCommonSymbol(GVSym, Size, Alignment.value()); + OutStreamer->emitLocalCommonSymbol(GVSym, Size, Alignment); return; } // .local _foo OutStreamer->emitSymbolAttribute(GVSym, MCSA_Local); // .comm _foo, 42, 4 - const bool SupportsAlignment = - getObjFileLowering().getCommDirectiveSupportsAlignment(); - OutStreamer->emitCommonSymbol(GVSym, Size, - SupportsAlignment ? Alignment.value() : 0); + OutStreamer->emitCommonSymbol(GVSym, Size, Alignment); return; } @@ -810,7 +803,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { if (GVKind.isThreadBSS()) { TheSection = getObjFileLowering().getTLSBSSSection(); - OutStreamer->emitTBSSSymbol(TheSection, MangSym, Size, Alignment.value()); + OutStreamer->emitTBSSSymbol(TheSection, MangSym, Size, Alignment); } else if (GVKind.isThreadData()) { OutStreamer->switchSection(TheSection); @@ -941,6 +934,9 @@ void AsmPrinter::emitFunctionHeader() { } } + // Emit KCFI type information before patchable-function-prefix nops. + emitKCFITypeId(*MF); + // Emit M NOPs for -fpatchable-function-entry=N,M where M>0. We arbitrarily // place prefix data before NOPs. unsigned PatchableFunctionPrefix = 0; @@ -1000,6 +996,11 @@ void AsmPrinter::emitFunctionHeader() { HI.TimerGroupDescription, TimePassesIsEnabled); HI.Handler->beginFunction(MF); } + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, + HI.TimerGroupDescription, TimePassesIsEnabled); + HI.Handler->beginBasicBlockSection(MF->front()); + } // Emit the prologue data. if (F.hasPrologueData()) @@ -1039,8 +1040,13 @@ void AsmPrinter::emitFunctionEntryLabel() { if (TM.getTargetTriple().isOSBinFormatELF()) { MCSymbol *Sym = getSymbolPreferLocal(MF->getFunction()); - if (Sym != CurrentFnSym) + if (Sym != CurrentFnSym) { + cast<MCSymbolELF>(Sym)->setType(ELF::STT_FUNC); + CurrentFnBeginLocal = Sym; OutStreamer->emitLabel(Sym); + if (MAI->hasDotTypeDotSizeDirective()) + OutStreamer->emitSymbolAttribute(Sym, MCSA_ELF_TypeFunction); + } } } @@ -1053,7 +1059,7 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { // We assume a single instruction only has a spill or reload, not // both. - Optional<unsigned> Size; + std::optional<unsigned> Size; if ((Size = MI.getRestoreSize(TII))) { CommentOS << *Size << "-byte Reload\n"; } else if ((Size = MI.getFoldedRestoreSize(TII))) { @@ -1128,10 +1134,15 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { OS << " <- "; const DIExpression *Expr = MI->getDebugExpression(); + // First convert this to a non-variadic expression if possible, to simplify + // the output. + if (auto NonVariadicExpr = DIExpression::convertToNonVariadicExpression(Expr)) + Expr = *NonVariadicExpr; + // Then, output the possibly-simplified expression. if (Expr->getNumElements()) { OS << '['; ListSeparator LS; - for (auto Op : Expr->expr_ops()) { + for (auto &Op : Expr->expr_ops()) { OS << LS << dwarf::OperationEncodingString(Op.getOp()); for (unsigned I = 0; I < Op.getNumArgs(); ++I) OS << ' ' << Op.getArg(I); @@ -1170,14 +1181,12 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { } case MachineOperand::MO_TargetIndex: { OS << "!target-index(" << Op.getIndex() << "," << Op.getOffset() << ")"; - // NOTE: Want this comment at start of line, don't emit with AddComment. - AP.OutStreamer->emitRawComment(OS.str()); break; } case MachineOperand::MO_Register: case MachineOperand::MO_FrameIndex: { Register Reg; - Optional<StackOffset> Offset; + std::optional<StackOffset> Offset; if (Op.isReg()) { Reg = Op.getReg(); } else { @@ -1328,7 +1337,8 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { OutStreamer->pushSection(); OutStreamer->switchSection(BBAddrMapSection); OutStreamer->AddComment("version"); - OutStreamer->emitInt8(OutStreamer->getContext().getBBAddrMapVersion()); + uint8_t BBAddrMapVersion = OutStreamer->getContext().getBBAddrMapVersion(); + OutStreamer->emitInt8(BBAddrMapVersion); OutStreamer->AddComment("feature"); OutStreamer->emitInt8(0); OutStreamer->AddComment("function address"); @@ -1340,18 +1350,49 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { for (const MachineBasicBlock &MBB : MF) { const MCSymbol *MBBSymbol = MBB.isEntryBlock() ? FunctionSymbol : MBB.getSymbol(); + // TODO: Remove this check when version 1 is deprecated. + if (BBAddrMapVersion > 1) { + OutStreamer->AddComment("BB id"); + // Emit the BB ID for this basic block. + OutStreamer->emitULEB128IntValue(*MBB.getBBID()); + } // Emit the basic block offset relative to the end of the previous block. // This is zero unless the block is padded due to alignment. emitLabelDifferenceAsULEB128(MBBSymbol, PrevMBBEndSymbol); // Emit the basic block size. When BBs have alignments, their size cannot // always be computed from their offsets. emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), MBBSymbol); + // Emit the Metadata. OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB)); PrevMBBEndSymbol = MBB.getEndSymbol(); } OutStreamer->popSection(); } +void AsmPrinter::emitKCFITrapEntry(const MachineFunction &MF, + const MCSymbol *Symbol) { + MCSection *Section = + getObjFileLowering().getKCFITrapSection(*MF.getSection()); + if (!Section) + return; + + OutStreamer->pushSection(); + OutStreamer->switchSection(Section); + + MCSymbol *Loc = OutContext.createLinkerPrivateTempSymbol(); + OutStreamer->emitLabel(Loc); + OutStreamer->emitAbsoluteSymbolDiff(Symbol, Loc, 4); + + OutStreamer->popSection(); +} + +void AsmPrinter::emitKCFITypeId(const MachineFunction &MF) { + const Function &F = MF.getFunction(); + if (const MDNode *MD = F.getMetadata(LLVMContext::MD_kcfi_type)) + emitGlobalConstant(F.getParent()->getDataLayout(), + mdconst::extract<ConstantInt>(MD->getOperand(0))); +} + void AsmPrinter::emitPseudoProbe(const MachineInstr &MI) { if (PP) { auto GUID = MI.getOperand(0).getImm(); @@ -1421,9 +1462,87 @@ void AsmPrinter::emitStackUsage(const MachineFunction &MF) { *StackUsageStream << "static\n"; } -static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF) { +void AsmPrinter::emitPCSectionsLabel(const MachineFunction &MF, + const MDNode &MD) { + MCSymbol *S = MF.getContext().createTempSymbol("pcsection"); + OutStreamer->emitLabel(S); + PCSectionsSymbols[&MD].emplace_back(S); +} + +void AsmPrinter::emitPCSections(const MachineFunction &MF) { + const Function &F = MF.getFunction(); + if (PCSectionsSymbols.empty() && !F.hasMetadata(LLVMContext::MD_pcsections)) + return; + + const CodeModel::Model CM = MF.getTarget().getCodeModel(); + const unsigned RelativeRelocSize = + (CM == CodeModel::Medium || CM == CodeModel::Large) ? getPointerSize() + : 4; + + // Switch to PCSection, short-circuiting the common case where the current + // section is still valid (assume most MD_pcsections contain just 1 section). + auto SwitchSection = [&, Prev = StringRef()](const StringRef &Sec) mutable { + if (Sec == Prev) + return; + MCSection *S = getObjFileLowering().getPCSection(Sec, MF.getSection()); + assert(S && "PC section is not initialized"); + OutStreamer->switchSection(S); + Prev = Sec; + }; + // Emit symbols into sections and data as specified in the pcsections MDNode. + auto EmitForMD = [&](const MDNode &MD, ArrayRef<const MCSymbol *> Syms, + bool Deltas) { + // Expect the first operand to be a section name. After that, a tuple of + // constants may appear, which will simply be emitted into the current + // section (the user of MD_pcsections decides the format of encoded data). + assert(isa<MDString>(MD.getOperand(0)) && "first operand not a string"); + for (const MDOperand &MDO : MD.operands()) { + if (auto *S = dyn_cast<MDString>(MDO)) { + SwitchSection(S->getString()); + const MCSymbol *Prev = Syms.front(); + for (const MCSymbol *Sym : Syms) { + if (Sym == Prev || !Deltas) { + // Use the entry itself as the base of the relative offset. + MCSymbol *Base = MF.getContext().createTempSymbol("pcsection_base"); + OutStreamer->emitLabel(Base); + // Emit relative relocation `addr - base`, which avoids a dynamic + // relocation in the final binary. User will get the address with + // `base + addr`. + emitLabelDifference(Sym, Base, RelativeRelocSize); + } else { + emitLabelDifference(Sym, Prev, 4); + } + Prev = Sym; + } + } else { + assert(isa<MDNode>(MDO) && "expecting either string or tuple"); + const auto *AuxMDs = cast<MDNode>(MDO); + for (const MDOperand &AuxMDO : AuxMDs->operands()) { + assert(isa<ConstantAsMetadata>(AuxMDO) && "expecting a constant"); + const auto *C = cast<ConstantAsMetadata>(AuxMDO); + emitGlobalConstant(F.getParent()->getDataLayout(), C->getValue()); + } + } + } + }; + + OutStreamer->pushSection(); + // Emit PCs for function start and function size. + if (const MDNode *MD = F.getMetadata(LLVMContext::MD_pcsections)) + EmitForMD(*MD, {getFunctionBegin(), getFunctionEnd()}, true); + // Emit PCs for instructions collected. + for (const auto &MS : PCSectionsSymbols) + EmitForMD(*MS.first, MS.second, false); + OutStreamer->popSection(); + PCSectionsSymbols.clear(); +} + +/// Returns true if function begin and end labels should be emitted. +static bool needFuncLabels(const MachineFunction &MF) { MachineModuleInfo &MMI = MF.getMMI(); - if (!MF.getLandingPads().empty() || MF.hasEHFunclets() || MMI.hasDebugInfo()) + if (!MF.getLandingPads().empty() || MF.hasEHFunclets() || + MMI.hasDebugInfo() || + MF.getFunction().hasMetadata(LLVMContext::MD_pcsections)) return true; // We might emit an EH table that uses function begin and end labels even if @@ -1481,6 +1600,9 @@ void AsmPrinter::emitFunctionBody() { if (MCSymbol *S = MI.getPreInstrSymbol()) OutStreamer->emitLabel(S); + if (MDNode *MD = MI.getPCSections()) + emitPCSectionsLabel(*MF, *MD); + for (const HandlerInfo &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, HI.TimerGroupDescription, TimePassesIsEnabled); @@ -1541,6 +1663,9 @@ void AsmPrinter::emitFunctionBody() { if (isVerbose()) OutStreamer->emitRawComment("ARITH_FENCE"); break; + case TargetOpcode::MEMBARRIER: + OutStreamer->emitRawComment("MEMBARRIER"); + break; default: emitInstruction(&MI); if (CanDoExtraAnalysis) { @@ -1666,8 +1791,11 @@ void AsmPrinter::emitFunctionBody() { // Emit target-specific gunk after the function body. emitFunctionBodyEnd(); - if (needFuncLabelsForEHOrDebugInfo(*MF) || - MAI->hasDotTypeDotSizeDirective()) { + // Even though wasm supports .type and .size in general, function symbols + // are automatically sized. + bool EmitFunctionSize = MAI->hasDotTypeDotSizeDirective() && !TT.isWasm(); + + if (needFuncLabels(*MF) || EmitFunctionSize) { // Create a symbol for the end of function. CurrentFnEnd = createTempSymbol("func_end"); OutStreamer->emitLabel(CurrentFnEnd); @@ -1675,15 +1803,26 @@ void AsmPrinter::emitFunctionBody() { // If the target wants a .size directive for the size of the function, emit // it. - if (MAI->hasDotTypeDotSizeDirective()) { + if (EmitFunctionSize) { // We can get the size as difference between the function label and the // temp label. const MCExpr *SizeExp = MCBinaryExpr::createSub( MCSymbolRefExpr::create(CurrentFnEnd, OutContext), MCSymbolRefExpr::create(CurrentFnSymForSize, OutContext), OutContext); OutStreamer->emitELFSize(CurrentFnSym, SizeExp); + if (CurrentFnBeginLocal) + OutStreamer->emitELFSize(CurrentFnBeginLocal, SizeExp); } + // Call endBasicBlockSection on the last block now, if it wasn't already + // called. + if (!MF->back().isEndSection()) { + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, + HI.TimerGroupDescription, TimePassesIsEnabled); + HI.Handler->endBasicBlockSection(MF->back()); + } + } for (const HandlerInfo &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, HI.TimerGroupDescription, TimePassesIsEnabled); @@ -1708,6 +1847,9 @@ void AsmPrinter::emitFunctionBody() { if (MF->hasBBLabels() && HasAnyRealCode) emitBBAddrMapSection(*MF); + // Emit sections containing instruction and function PCs. + emitPCSections(*MF); + // Emit section containing stack size metadata. emitStackSizeSection(*MF); @@ -1909,8 +2051,8 @@ void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) { remarks::RemarkSerializer &RemarkSerializer = RS.getSerializer(); - Optional<SmallString<128>> Filename; - if (Optional<StringRef> FilenameRef = RS.getFilename()) { + std::optional<SmallString<128>> Filename; + if (std::optional<StringRef> FilenameRef = RS.getFilename()) { Filename = *FilenameRef; sys::fs::make_absolute(*Filename); assert(!Filename->empty() && "The filename can't be empty."); @@ -2041,6 +2183,12 @@ bool AsmPrinter::doFinalization(Module &M) { if (auto *TS = OutStreamer->getTargetStreamer()) TS->emitConstantPools(); + // Emit Stack maps before any debug info. Mach-O requires that no data or + // text sections come after debug info has been emitted. This matters for + // stack maps as they are arbitrary data, and may even have a custom format + // through user plugins. + emitStackMaps(); + // Finalize debug and EH information. for (const HandlerInfo &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, @@ -2103,7 +2251,7 @@ bool AsmPrinter::doFinalization(Module &M) { GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); assert(MI && "AsmPrinter didn't require GCModuleInfo?"); for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; ) - if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(**--I)) + if (GCMetadataPrinter *MP = getOrCreateGCPrinter(**--I)) MP->finishAssembly(M, *MI, *this); // Emit llvm.ident metadata in an '.ident' directive. @@ -2133,9 +2281,9 @@ bool AsmPrinter::doFinalization(Module &M) { // Emit address-significance attributes for all globals. OutStreamer->emitAddrsig(); for (const GlobalValue &GV : M.global_values()) { - if (!GV.use_empty() && !GV.isTransitiveUsedByMetadataOnly() && - !GV.isThreadLocal() && !GV.hasDLLImportStorageClass() && - !GV.getName().startswith("llvm.") && !GV.hasAtLeastLocalUnnamedAddr()) + if (!GV.use_empty() && !GV.isThreadLocal() && + !GV.hasDLLImportStorageClass() && !GV.getName().startswith("llvm.") && + !GV.hasAtLeastLocalUnnamedAddr()) OutStreamer->emitAddrsigSym(getSymbol(&GV)); } } @@ -2213,6 +2361,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { CurrentFnSymForSize = CurrentFnSym; CurrentFnBegin = nullptr; + CurrentFnBeginLocal = nullptr; CurrentSectionBeginSym = nullptr; MBBSectionRanges.clear(); MBBSectionExceptionSyms.clear(); @@ -2220,7 +2369,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { if (F.hasFnAttribute("patchable-function-entry") || F.hasFnAttribute("function-instrument") || F.hasFnAttribute("xray-instruction-threshold") || - needFuncLabelsForEHOrDebugInfo(MF) || NeedsLocalForSize || + needFuncLabels(MF) || NeedsLocalForSize || MF.getTarget().Options.EmitStackSizeSection || MF.hasBBLabels()) { CurrentFnBegin = createTempSymbol("func_begin"); if (NeedsLocalForSize) @@ -2692,9 +2841,9 @@ void AsmPrinter::emitAlignment(Align Alignment, const GlobalObject *GV, STI = &getSubtargetInfo(); else STI = TM.getMCSubtargetInfo(); - OutStreamer->emitCodeAlignment(Alignment.value(), STI, MaxBytesToEmit); + OutStreamer->emitCodeAlignment(Alignment, STI, MaxBytesToEmit); } else - OutStreamer->emitValueToAlignment(Alignment.value(), 0, 1, MaxBytesToEmit); + OutStreamer->emitValueToAlignment(Alignment, 0, 1, MaxBytesToEmit); } //===----------------------------------------------------------------------===// @@ -2761,7 +2910,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { // expression properly. This is important for differences between // blockaddress labels. Since the two labels are in the same function, it // is reasonable to treat their delta as a 32-bit value. - LLVM_FALLTHROUGH; + [[fallthrough]]; case Instruction::BitCast: return lowerConstant(CE->getOperand(0)); @@ -2791,8 +2940,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { // // If the pointer is larger than the resultant integer, then // as with Trunc just depend on the assembler to truncate it. - if (DL.getTypeAllocSize(Ty).getFixedSize() <= - DL.getTypeAllocSize(Op->getType()).getFixedSize()) + if (DL.getTypeAllocSize(Ty).getFixedValue() <= + DL.getTypeAllocSize(Op->getType()).getFixedValue()) return OpExpr; break; // Error @@ -3526,11 +3675,6 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { } } - // Emit an alignment directive for this block, if needed. - const Align Alignment = MBB.getAlignment(); - if (Alignment != Align(1)) - emitAlignment(Alignment, nullptr, MBB.getMaxBytesForAlignment()); - // Switch to a new section if this basic block must begin a section. The // entry block is always placed in the function section and is handled // separately. @@ -3541,25 +3685,30 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { CurrentSectionBeginSym = MBB.getSymbol(); } + // Emit an alignment directive for this block, if needed. + const Align Alignment = MBB.getAlignment(); + if (Alignment != Align(1)) + emitAlignment(Alignment, nullptr, MBB.getMaxBytesForAlignment()); + // If the block has its address taken, emit any labels that were used to // reference the block. It is possible that there is more than one label // here, because multiple LLVM BB's may have been RAUW'd to this block after // the references were generated. - const BasicBlock *BB = MBB.getBasicBlock(); - if (MBB.hasAddressTaken()) { + if (MBB.isIRBlockAddressTaken()) { if (isVerbose()) OutStreamer->AddComment("Block address taken"); - // MBBs can have their address taken as part of CodeGen without having - // their corresponding BB's address taken in IR - if (BB && BB->hasAddressTaken()) - for (MCSymbol *Sym : getAddrLabelSymbolToEmit(BB)) - OutStreamer->emitLabel(Sym); + BasicBlock *BB = MBB.getAddressTakenIRBlock(); + assert(BB && BB->hasAddressTaken() && "Missing BB"); + for (MCSymbol *Sym : getAddrLabelSymbolToEmit(BB)) + OutStreamer->emitLabel(Sym); + } else if (isVerbose() && MBB.isMachineBlockAddressTaken()) { + OutStreamer->AddComment("Block address taken"); } // Print some verbose block comments. if (isVerbose()) { - if (BB) { + if (const BasicBlock *BB = MBB.getBasicBlock()) { if (BB->hasName()) { BB->printAsOperand(OutStreamer->getCommentOS(), /*PrintType=*/false, BB->getModule()); @@ -3590,11 +3739,11 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { } // With BB sections, each basic block must handle CFI information on its own - // if it begins a section (Entry block is handled separately by - // AsmPrinterHandler::beginFunction). + // if it begins a section (Entry block call is handled separately, next to + // beginFunction). if (MBB.isBeginSection() && !MBB.isEntryBlock()) for (const HandlerInfo &HI : Handlers) - HI.Handler->beginBasicBlock(MBB); + HI.Handler->beginBasicBlockSection(MBB); } void AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) { @@ -3602,7 +3751,7 @@ void AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) { // sections. if (MBB.isEndSection()) for (const HandlerInfo &HI : Handlers) - HI.Handler->endBasicBlock(MBB); + HI.Handler->endBasicBlockSection(MBB); } void AsmPrinter::emitVisibility(MCSymbol *Sym, unsigned Visibility, @@ -3684,13 +3833,12 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { return true; } -GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) { +GCMetadataPrinter *AsmPrinter::getOrCreateGCPrinter(GCStrategy &S) { if (!S.usesMetadata()) return nullptr; - gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); - gcp_map_type::iterator GCPI = GCMap.find(&S); - if (GCPI != GCMap.end()) + auto [GCPI, Inserted] = GCMetadataPrinters.insert({&S, nullptr}); + if (!Inserted) return GCPI->second.get(); auto Name = S.getName(); @@ -3700,14 +3848,14 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) { if (Name == GCMetaPrinter.getName()) { std::unique_ptr<GCMetadataPrinter> GMP = GCMetaPrinter.instantiate(); GMP->S = &S; - auto IterBool = GCMap.insert(std::make_pair(&S, std::move(GMP))); - return IterBool.first->second.get(); + GCPI->second = std::move(GMP); + return GCPI->second.get(); } report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name)); } -void AsmPrinter::emitStackMaps(StackMaps &SM) { +void AsmPrinter::emitStackMaps() { GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); assert(MI && "AsmPrinter didn't require GCModuleInfo?"); bool NeedsDefault = false; @@ -3716,7 +3864,7 @@ void AsmPrinter::emitStackMaps(StackMaps &SM) { NeedsDefault = true; else for (const auto &I : *MI) { - if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I)) + if (GCMetadataPrinter *MP = getOrCreateGCPrinter(*I)) if (MP->emitStackMaps(SM, *this)) continue; // The strategy doesn't have printer or doesn't emit custom stack maps. @@ -3818,7 +3966,8 @@ void AsmPrinter::emitXRayTable() { // pointers. This should work for both 32-bit and 64-bit platforms. if (FnSledIndex) { OutStreamer->switchSection(FnSledIndex); - OutStreamer->emitCodeAlignment(2 * WordSizeBytes, &getSubtargetInfo()); + OutStreamer->emitCodeAlignment(Align(2 * WordSizeBytes), + &getSubtargetInfo()); OutStreamer->emitSymbolValue(SledsStart, WordSizeBytes, false); OutStreamer->emitSymbolValue(SledsEnd, WordSizeBytes, false); OutStreamer->switchSection(PrevSection); @@ -3894,7 +4043,7 @@ unsigned int AsmPrinter::getDwarfOffsetByteSize() const { dwarf::FormParams AsmPrinter::getDwarfFormParams() const { return {getDwarfVersion(), uint8_t(getPointerSize()), OutStreamer->getContext().getDwarfFormat(), - MAI->doesDwarfUseRelocationsAcrossSections()}; + doesDwarfUseRelocationsAcrossSections()}; } unsigned int AsmPrinter::getUnitLengthFieldByteSize() const { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index bfa53f5b9374..ecaa64afab4d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -163,7 +163,7 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label, } // If the format uses relocations with dwarf, refer to the symbol directly. - if (MAI->doesDwarfUseRelocationsAcrossSections()) { + if (doesDwarfUseRelocationsAcrossSections()) { OutStreamer->emitSymbolValue(Label, getDwarfOffsetByteSize()); return; } @@ -175,7 +175,7 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label, } void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntry S) const { - if (MAI->doesDwarfUseRelocationsAcrossSections()) { + if (doesDwarfUseRelocationsAcrossSections()) { assert(S.Symbol && "No symbol available"); emitDwarfSymbolReference(S.Symbol); return; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 88c82cbc958b..c1588aaea05e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -330,16 +330,8 @@ static void EmitInlineAsmStr(const char *AsmStr, const MachineInstr *MI, void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms"); - // Count the number of register definitions to find the asm string. - unsigned NumDefs = 0; - for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef(); - ++NumDefs) - assert(NumDefs != MI->getNumOperands()-2 && "No asm string?"); - - assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?"); - // Disassemble the AsmStr, printing out the literal pieces, the operands, etc. - const char *AsmStr = MI->getOperand(NumDefs).getSymbolName(); + const char *AsmStr = MI->getOperand(0).getSymbolName(); // If this asmstr is empty, just print the #APP/#NOAPP markers. // These are useful to see where empty asm's wound up. @@ -411,6 +403,14 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { LocCookie, Msg, DiagnosticSeverity::DS_Warning)); MMI->getModule()->getContext().diagnose( DiagnosticInfoInlineAsm(LocCookie, Note, DiagnosticSeverity::DS_Note)); + + for (const Register RR : RestrRegs) { + if (std::optional<std::string> reason = + TRI->explainReservedReg(*MF, RR)) { + MMI->getModule()->getContext().diagnose(DiagnosticInfoInlineAsm( + LocCookie, *reason, DiagnosticSeverity::DS_Note)); + } + } } emitInlineAsm(OS.str(), getSubtargetInfo(), TM.Options.MCOptions, LocMD, @@ -480,7 +480,7 @@ bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, PrintAsmMemoryOperand(MI, OpNo, nullptr, O); return false; } - LLVM_FALLTHROUGH; // GCC allows '%a' to behave like '%c' with immediates. + [[fallthrough]]; // GCC allows '%a' to behave like '%c' with immediates. case 'c': // Substitute immediate value without immediate syntax if (MO.isImm()) { O << MO.getImm(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 701c0affdfa6..0a67c4b6beb6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -12,8 +12,6 @@ #include "CodeViewDebug.h" #include "llvm/ADT/APSInt.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" @@ -560,7 +558,7 @@ void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL, } void CodeViewDebug::emitCodeViewMagicVersion() { - OS.emitValueToAlignment(4); + OS.emitValueToAlignment(Align(4)); OS.AddComment("Debug section magic"); OS.emitInt32(COFF::DEBUG_SECTION_MAGIC); } @@ -730,7 +728,7 @@ void CodeViewDebug::emitTypeInformation() { TypeRecordMapping typeMapping(CVMCOS); Pipeline.addCallbackToPipeline(typeMapping); - Optional<TypeIndex> B = Table.getFirst(); + std::optional<TypeIndex> B = Table.getFirst(); while (B) { // This will fail if the record data is invalid. CVType Record = Table.getType(*B); @@ -754,13 +752,13 @@ void CodeViewDebug::emitTypeGlobalHashes() { // hardcoded to version 0, SHA1. OS.switchSection(Asm->getObjFileLowering().getCOFFGlobalTypeHashesSection()); - OS.emitValueToAlignment(4); + OS.emitValueToAlignment(Align(4)); OS.AddComment("Magic"); OS.emitInt32(COFF::DEBUG_HASHES_SECTION_MAGIC); OS.AddComment("Section Version"); OS.emitInt16(0); OS.AddComment("Hash Algorithm"); - OS.emitInt16(uint16_t(GlobalTypeHashAlg::SHA1_8)); + OS.emitInt16(uint16_t(GlobalTypeHashAlg::BLAKE3)); TypeIndex TI(TypeIndex::FirstNonSimpleIndex); for (const auto &GHR : TypeTable.hashes()) { @@ -908,6 +906,9 @@ static std::string flattenCommandLine(ArrayRef<std::string> Args, } if (Arg.startswith("-object-file-name") || Arg == MainFilename) continue; + // Skip fmessage-length for reproduciability. + if (Arg.startswith("-fmessage-length")) + continue; if (PrintedOneArg) OS << " "; llvm::sys::printArg(OS, Arg, /*Quote=*/true); @@ -1337,10 +1338,20 @@ void CodeViewDebug::calculateRanges( assert(DVInst->isDebugValue() && "Invalid History entry"); // FIXME: Find a way to represent constant variables, since they are // relatively common. - Optional<DbgVariableLocation> Location = + std::optional<DbgVariableLocation> Location = DbgVariableLocation::extractFromMachineInstruction(*DVInst); if (!Location) + { + // When we don't have a location this is usually because LLVM has + // transformed it into a constant and we only have an llvm.dbg.value. We + // can't represent these well in CodeView since S_LOCAL only works on + // registers and memory locations. Instead, we will pretend this to be a + // constant value to at least have it show up in the debugger. + auto Op = DVInst->getDebugOperand(0); + if (Op.isImm()) + Var.ConstantValue = APSInt(APInt(64, Op.getImm()), false); continue; + } // CodeView can only express variables in register and variables in memory // at a constant offset from a register. However, for variables passed @@ -1498,8 +1509,16 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) { FPO |= FrameProcedureOptions::MarkedInline; if (GV.hasFnAttribute(Attribute::Naked)) FPO |= FrameProcedureOptions::Naked; - if (MFI.hasStackProtectorIndex()) + if (MFI.hasStackProtectorIndex()) { FPO |= FrameProcedureOptions::SecurityChecks; + if (GV.hasFnAttribute(Attribute::StackProtectStrong) || + GV.hasFnAttribute(Attribute::StackProtectReq)) { + FPO |= FrameProcedureOptions::StrictSecurityChecks; + } + } else if (!GV.hasStackProtectorFnAttr()) { + // __declspec(safebuffers) disables stack guards. + FPO |= FrameProcedureOptions::SafeBuffers; + } FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedLocalFramePtrReg) << 14U); FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedParamFramePtrReg) << 16U); if (Asm->TM.getOptLevel() != CodeGenOpt::None && @@ -1620,7 +1639,7 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) { case dwarf::DW_TAG_pointer_type: if (cast<DIDerivedType>(Ty)->getName() == "__vtbl_ptr_type") return lowerTypeVFTableShape(cast<DIDerivedType>(Ty)); - LLVM_FALLTHROUGH; + [[fallthrough]]; case dwarf::DW_TAG_reference_type: case dwarf::DW_TAG_rvalue_reference_type: return lowerTypePointer(cast<DIDerivedType>(Ty)); @@ -2023,9 +2042,9 @@ TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) { ReturnAndArgTypeIndices.back() = TypeIndex::None(); } TypeIndex ReturnTypeIndex = TypeIndex::Void(); - ArrayRef<TypeIndex> ArgTypeIndices = None; + ArrayRef<TypeIndex> ArgTypeIndices = std::nullopt; if (!ReturnAndArgTypeIndices.empty()) { - auto ReturnAndArgTypesRef = makeArrayRef(ReturnAndArgTypeIndices); + auto ReturnAndArgTypesRef = ArrayRef(ReturnAndArgTypeIndices); ReturnTypeIndex = ReturnAndArgTypesRef.front(); ArgTypeIndices = ReturnAndArgTypesRef.drop_front(); } @@ -2777,9 +2796,19 @@ void CodeViewDebug::emitLocalVariableList(const FunctionInfo &FI, emitLocalVariable(FI, *L); // Next emit all non-parameters in the order that we found them. - for (const LocalVariable &L : Locals) - if (!L.DIVar->isParameter()) - emitLocalVariable(FI, L); + for (const LocalVariable &L : Locals) { + if (!L.DIVar->isParameter()) { + if (L.ConstantValue) { + // If ConstantValue is set we will emit it as a S_CONSTANT instead of a + // S_LOCAL in order to be able to represent it at all. + const DIType *Ty = L.DIVar->getType(); + APSInt Val(*L.ConstantValue); + emitConstantSymbolRecord(Ty, Val, std::string(L.DIVar->getName())); + } else { + emitLocalVariable(FI, L); + } + } + } } void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, @@ -3098,7 +3127,7 @@ MCSymbol *CodeViewDebug::beginCVSubsection(DebugSubsectionKind Kind) { void CodeViewDebug::endCVSubsection(MCSymbol *EndLabel) { OS.emitLabel(EndLabel); // Every subsection must be aligned to a 4-byte boundary. - OS.emitValueToAlignment(4); + OS.emitValueToAlignment(Align(4)); } static StringRef getSymbolName(SymbolKind SymKind) { @@ -3125,7 +3154,7 @@ void CodeViewDebug::endSymbolRecord(MCSymbol *SymEnd) { // an extra copy of every symbol record in LLD. This increases object file // size by less than 1% in the clang build, and is compatible with the Visual // C++ linker. - OS.emitValueToAlignment(4); + OS.emitValueToAlignment(Align(4)); OS.emitLabel(SymEnd); } @@ -3350,11 +3379,13 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) { if (const auto *MemberDecl = dyn_cast_or_null<DIDerivedType>( DIGV->getRawStaticDataMemberDeclaration())) Scope = MemberDecl->getScope(); - // For Fortran, the scoping portion is elided in its name so that we can - // reference the variable in the command line of the VS debugger. + // For static local variables and Fortran, the scoping portion is elided + // in its name so that we can reference the variable in the command line + // of the VS debugger. std::string QualifiedName = - (moduleIsInFortran()) ? std::string(DIGV->getName()) - : getFullyQualifiedName(Scope, DIGV->getName()); + (moduleIsInFortran() || (Scope && isa<DILocalScope>(Scope))) + ? std::string(DIGV->getName()) + : getFullyQualifiedName(Scope, DIGV->getName()); if (const GlobalVariable *GV = CVGV.GVInfo.dyn_cast<const GlobalVariable *>()) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h index 16f0082723ed..495822a6e653 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -82,7 +82,7 @@ public: } }; - static_assert(sizeof(uint64_t) == sizeof(LocalVarDef), ""); + static_assert(sizeof(uint64_t) == sizeof(LocalVarDef)); private: MCStreamer &OS; @@ -104,6 +104,7 @@ private: SmallVector<std::pair<const MCSymbol *, const MCSymbol *>, 1>> DefRanges; bool UseReferenceType = false; + std::optional<APSInt> ConstantValue; }; struct CVGlobalVariable { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 617ddbd66e4e..308d4b1b5d61 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -425,7 +425,7 @@ void DIEInteger::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const { /// unsigned DIEInteger::sizeOf(const dwarf::FormParams &FormParams, dwarf::Form Form) const { - if (Optional<uint8_t> FixedSize = + if (std::optional<uint8_t> FixedSize = dwarf::getFixedFormByteSize(Form, FormParams)) return *FixedSize; @@ -580,7 +580,7 @@ void DIEString::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { DIEInteger(S.getIndex()).emitValue(AP, Form); return; case dwarf::DW_FORM_strp: - if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) + if (AP->doesDwarfUseRelocationsAcrossSections()) DIELabel(S.getSymbol()).emitValue(AP, Form); else DIEInteger(S.getOffset()).emitValue(AP, Form); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index 1d546e5fd72e..08ed78eb20a1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -42,7 +42,7 @@ static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) { void DIEHash::addString(StringRef Str) { LLVM_DEBUG(dbgs() << "Adding string " << Str << " to hash.\n"); Hash.update(Str); - Hash.update(makeArrayRef((uint8_t)'\0')); + Hash.update(ArrayRef((uint8_t)'\0')); } // FIXME: The LEB128 routines are copied and only slightly modified out of @@ -389,7 +389,7 @@ void DIEHash::computeHash(const DIE &Die) { } // Following the last (or if there are no children), append a zero byte. - Hash.update(makeArrayRef((uint8_t)'\0')); + Hash.update(ArrayRef((uint8_t)'\0')); } /// This is based on the type signature computation given in section 7.27 of the diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp index dabbfb45f687..0b40cdb0c3cc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/DbgEntityHistoryCalculator.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -26,6 +25,7 @@ #include "llvm/Support/raw_ostream.h" #include <cassert> #include <map> +#include <optional> #include <utility> using namespace llvm; @@ -76,7 +76,7 @@ bool DbgValueHistoryMap::startDbgValue(InlinedEntity Var, auto &Entries = VarEntries[Var]; if (!Entries.empty() && Entries.back().isDbgValue() && !Entries.back().isClosed() && - Entries.back().getInstr()->isIdenticalTo(MI)) { + Entries.back().getInstr()->isEquivalentDbgInstr(MI)) { LLVM_DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" << "\t" << Entries.back().getInstr() << "\t" << MI << "\n"); @@ -110,20 +110,20 @@ void DbgValueHistoryMap::Entry::endEntry(EntryIndex Index) { /// range in Ranges. EndMI can be nullptr to indicate that the range is /// unbounded. Assumes Ranges is ordered and disjoint. Returns true and points /// to the first intersecting scope range if one exists. -static Optional<ArrayRef<InsnRange>::iterator> +static std::optional<ArrayRef<InsnRange>::iterator> intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, const ArrayRef<InsnRange> &Ranges, const InstructionOrdering &Ordering) { for (auto RangesI = Ranges.begin(), RangesE = Ranges.end(); RangesI != RangesE; ++RangesI) { if (EndMI && Ordering.isBefore(EndMI, RangesI->first)) - return None; + return std::nullopt; if (EndMI && !Ordering.isBefore(RangesI->second, EndMI)) return RangesI; if (Ordering.isBefore(StartMI, RangesI->second)) return RangesI; } - return None; + return std::nullopt; } void DbgValueHistoryMap::trimLocationRanges( @@ -264,7 +264,7 @@ bool DbgValueHistoryMap::hasNonEmptyLocation(const Entries &Entries) const { const MachineInstr *MI = Entry.getInstr(); assert(MI->isDebugValue()); // A DBG_VALUE $noreg is an empty variable location - if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == 0) + if (MI->isUndefDebugValue()) continue; return true; @@ -495,7 +495,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF, continue; // If this is a virtual register, only clobber it since it doesn't // have aliases. - if (Register::isVirtualRegister(MO.getReg())) + if (MO.getReg().isVirtual()) clobberRegisterUses(RegVars, MO.getReg(), DbgValues, LiveEntries, MI); // If this is a register def operand, it may end a debug value diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 8ebbed974abb..858a3e75e515 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/DebugHandlerBase.h" -#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" @@ -30,15 +29,15 @@ using namespace llvm; /// variable's lexical scope instruction ranges. static cl::opt<bool> TrimVarLocs("trim-var-locs", cl::Hidden, cl::init(true)); -Optional<DbgVariableLocation> +std::optional<DbgVariableLocation> DbgVariableLocation::extractFromMachineInstruction( const MachineInstr &Instruction) { DbgVariableLocation Location; // Variables calculated from multiple locations can't be represented here. if (Instruction.getNumDebugOperands() != 1) - return None; + return std::nullopt; if (!Instruction.getDebugOperand(0).isReg()) - return None; + return std::nullopt; Location.Register = Instruction.getDebugOperand(0).getReg(); Location.FragmentInfo.reset(); // We only handle expressions generated by DIExpression::appendOffset, @@ -53,7 +52,7 @@ DbgVariableLocation::extractFromMachineInstruction( Op->getOp() == dwarf::DW_OP_LLVM_arg) ++Op; else - return None; + return std::nullopt; } while (Op != DIExpr->expr_op_end()) { switch (Op->getOp()) { @@ -84,7 +83,7 @@ DbgVariableLocation::extractFromMachineInstruction( Offset = 0; break; default: - return None; + return std::nullopt; } ++Op; } @@ -416,16 +415,12 @@ void DebugHandlerBase::endFunction(const MachineFunction *MF) { InstOrdering.clear(); } -void DebugHandlerBase::beginBasicBlock(const MachineBasicBlock &MBB) { - if (!MBB.isBeginSection()) - return; - - PrevLabel = MBB.getSymbol(); +void DebugHandlerBase::beginBasicBlockSection(const MachineBasicBlock &MBB) { + EpilogBeginBlock = nullptr; + if (!MBB.isEntryBlock()) + PrevLabel = MBB.getSymbol(); } -void DebugHandlerBase::endBasicBlock(const MachineBasicBlock &MBB) { - if (!MBB.isEndSection()) - return; - +void DebugHandlerBase::endBasicBlockSection(const MachineBasicBlock &MBB) { PrevLabel = nullptr; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h index d7ab2091967f..2008aa39ff87 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -76,6 +76,9 @@ public: : EntryKind(E_TargetIndexLocation), TIL(Loc) {} bool isLocation() const { return EntryKind == E_Location; } + bool isIndirectLocation() const { + return EntryKind == E_Location && Loc.isIndirect(); + } bool isTargetIndexLocation() const { return EntryKind == E_TargetIndexLocation; } @@ -116,13 +119,7 @@ class DbgValueLoc { public: DbgValueLoc(const DIExpression *Expr, ArrayRef<DbgValueLocEntry> Locs) : Expression(Expr), ValueLocEntries(Locs.begin(), Locs.end()), - IsVariadic(true) { -#ifndef NDEBUG - // Currently, DBG_VALUE_VAR expressions must use stack_value. - assert(Expr && Expr->isValid() && - is_contained(Locs, dwarf::DW_OP_stack_value)); -#endif - } + IsVariadic(true) {} DbgValueLoc(const DIExpression *Expr, ArrayRef<DbgValueLocEntry> Locs, bool IsVariadic) @@ -133,10 +130,6 @@ public: !any_of(Locs, [](auto LE) { return LE.isLocation(); })); if (!IsVariadic) { assert(ValueLocEntries.size() == 1); - } else { - // Currently, DBG_VALUE_VAR expressions must use stack_value. - assert(Expr && Expr->isValid() && - is_contained(Expr->getElements(), dwarf::DW_OP_stack_value)); } #endif } @@ -150,10 +143,31 @@ public: bool isFragment() const { return getExpression()->isFragment(); } bool isEntryVal() const { return getExpression()->isEntryValue(); } bool isVariadic() const { return IsVariadic; } - const DIExpression *getExpression() const { return Expression; } - const ArrayRef<DbgValueLocEntry> getLocEntries() const { - return ValueLocEntries; + bool isEquivalent(const DbgValueLoc &Other) const { + // Cannot be equivalent with different numbers of entries. + if (ValueLocEntries.size() != Other.ValueLocEntries.size()) + return false; + bool ThisIsIndirect = + !IsVariadic && ValueLocEntries[0].isIndirectLocation(); + bool OtherIsIndirect = + !Other.IsVariadic && Other.ValueLocEntries[0].isIndirectLocation(); + // Check equivalence of DIExpressions + Directness together. + if (!DIExpression::isEqualExpression(Expression, ThisIsIndirect, + Other.Expression, OtherIsIndirect)) + return false; + // Indirectness should have been accounted for in the above check, so just + // compare register values directly here. + if (ThisIsIndirect || OtherIsIndirect) { + DbgValueLocEntry ThisOp = ValueLocEntries[0]; + DbgValueLocEntry OtherOp = Other.ValueLocEntries[0]; + return ThisOp.isLocation() && OtherOp.isLocation() && + ThisOp.getLoc().getReg() == OtherOp.getLoc().getReg(); + } + // If neither are indirect, then just compare the loc entries directly. + return ValueLocEntries == Other.ValueLocEntries; } + const DIExpression *getExpression() const { return Expression; } + ArrayRef<DbgValueLocEntry> getLocEntries() const { return ValueLocEntries; } friend bool operator==(const DbgValueLoc &, const DbgValueLoc &); friend bool operator<(const DbgValueLoc &, const DbgValueLoc &); #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -193,11 +207,15 @@ public: /// Entry. bool MergeRanges(const DebugLocEntry &Next) { // If this and Next are describing the same variable, merge them. - if ((End == Next.Begin && Values == Next.Values)) { - End = Next.End; - return true; - } - return false; + if (End != Next.Begin) + return false; + if (Values.size() != Next.Values.size()) + return false; + for (unsigned EntryIdx = 0; EntryIdx < Values.size(); ++EntryIdx) + if (!Values[EntryIdx].isEquivalent(Next.Values[EntryIdx])) + return false; + End = Next.End; + return true; } const MCSymbol *getBeginSym() const { return Begin; } @@ -214,6 +232,11 @@ public: // Sort the pieces by offset. // Remove any duplicate entries by dropping all but the first. void sortUniqueValues() { + // Values is either 1 item that does not have a fragment, or many items + // that all do. No need to sort if the former and also prevents operator< + // being called on a non fragment item when _GLIBCXX_DEBUG is defined. + if (Values.size() == 1) + return; llvm::sort(Values); Values.erase(std::unique(Values.begin(), Values.end(), [](const DbgValueLoc &A, const DbgValueLoc &B) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h index 10019a4720e6..0515173b4a24 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h @@ -109,19 +109,18 @@ public: ArrayRef<Entry> getEntries(const List &L) const { size_t LI = getIndex(L); - return makeArrayRef(Entries) - .slice(Lists[LI].EntryOffset, getNumEntries(LI)); + return ArrayRef(Entries).slice(Lists[LI].EntryOffset, getNumEntries(LI)); } ArrayRef<char> getBytes(const Entry &E) const { size_t EI = getIndex(E); - return makeArrayRef(DWARFBytes.begin(), DWARFBytes.end()) + return ArrayRef(DWARFBytes.begin(), DWARFBytes.end()) .slice(Entries[EI].ByteOffset, getNumBytes(EI)); } ArrayRef<std::string> getComments(const Entry &E) const { size_t EI = getIndex(E); - return makeArrayRef(Comments) - .slice(Entries[EI].CommentOffset, getNumComments(EI)); + return ArrayRef(Comments).slice(Entries[EI].CommentOffset, + getNumComments(EI)); } private: @@ -159,13 +158,13 @@ class DebugLocStream::ListBuilder { DbgVariable &V; const MachineInstr &MI; size_t ListIndex; - Optional<uint8_t> TagOffset; + std::optional<uint8_t> TagOffset; public: ListBuilder(DebugLocStream &Locs, DwarfCompileUnit &CU, AsmPrinter &Asm, DbgVariable &V, const MachineInstr &MI) : Locs(Locs), Asm(Asm), V(V), MI(MI), ListIndex(Locs.startList(&CU)), - TagOffset(None) {} + TagOffset(std::nullopt) {} void setTagOffset(uint8_t TO) { TagOffset = TO; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 5f187acf13dc..df4fe8d49806 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -23,28 +23,15 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; -DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A) : EHStreamer(A) {} +DwarfCFIException::DwarfCFIException(AsmPrinter *A) : EHStreamer(A) {} -void DwarfCFIExceptionBase::markFunctionEnd() { - endFragment(); - - // Map all labels and get rid of any dead landing pads. - if (!Asm->MF->getLandingPads().empty()) { - MachineFunction *NonConstMF = const_cast<MachineFunction*>(Asm->MF); - NonConstMF->tidyLandingPads(); - } -} +DwarfCFIException::~DwarfCFIException() = default; -void DwarfCFIExceptionBase::endFragment() { - if (shouldEmitCFI && !Asm->MF->hasBBSections()) - Asm->OutStreamer->emitCFIEndProc(); +void DwarfCFIException::addPersonality(const GlobalValue *Personality) { + if (!llvm::is_contained(Personalities, Personality)) + Personalities.push_back(Personality); } -DwarfCFIException::DwarfCFIException(AsmPrinter *A) - : DwarfCFIExceptionBase(A) {} - -DwarfCFIException::~DwarfCFIException() = default; - /// endModule - Emit all exception information that should come after the /// content. void DwarfCFIException::endModule() { @@ -59,18 +46,12 @@ void DwarfCFIException::endModule() { if ((PerEncoding & 0x80) != dwarf::DW_EH_PE_indirect) return; - // Emit references to all used personality functions - for (const Function *Personality : MMI->getPersonalities()) { - if (!Personality) - continue; + // Emit indirect reference table for all used personality functions + for (const GlobalValue *Personality : Personalities) { MCSymbol *Sym = Asm->getSymbol(Personality); TLOF.emitPersonalityValue(*Asm->OutStreamer, Asm->getDataLayout(), Sym); } -} - -static MCSymbol *getExceptionSym(AsmPrinter *Asm, - const MachineBasicBlock *MBB) { - return Asm->getMBBExceptionSym(*MBB); + Personalities.clear(); } void DwarfCFIException::beginFunction(const MachineFunction *MF) { @@ -86,9 +67,9 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); unsigned PerEncoding = TLOF.getPersonalityEncoding(); - const Function *Per = nullptr; + const GlobalValue *Per = nullptr; if (F.hasPersonalityFn()) - Per = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()); + Per = dyn_cast<GlobalValue>(F.getPersonalityFn()->stripPointerCasts()); // Emit a personality function even when there are no landing pads forceEmitPersonality = @@ -114,12 +95,9 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { MAI.usesCFIForEH() && (shouldEmitPersonality || shouldEmitMoves); else shouldEmitCFI = Asm->needsCFIForDebug() && shouldEmitMoves; - - beginFragment(&*MF->begin(), getExceptionSym); } -void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB, - ExceptionSymbolProvider ESP) { +void DwarfCFIException::beginBasicBlockSection(const MachineBasicBlock &MBB) { if (!shouldEmitCFI) return; @@ -141,14 +119,11 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB, if (!shouldEmitPersonality) return; - auto &F = MBB->getParent()->getFunction(); - auto *P = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()); + auto &F = MBB.getParent()->getFunction(); + auto *P = dyn_cast<GlobalValue>(F.getPersonalityFn()->stripPointerCasts()); assert(P && "Expected personality function"); - - // If we are forced to emit this personality, make sure to record - // it because it might not appear in any landingpad - if (forceEmitPersonality) - MMI->addPersonality(P); + // Record the personality function. + addPersonality(P); const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); unsigned PerEncoding = TLOF.getPersonalityEncoding(); @@ -157,7 +132,13 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB, // Provide LSDA information. if (shouldEmitLSDA) - Asm->OutStreamer->emitCFILsda(ESP(Asm, MBB), TLOF.getLSDAEncoding()); + Asm->OutStreamer->emitCFILsda(Asm->getMBBExceptionSym(MBB), + TLOF.getLSDAEncoding()); +} + +void DwarfCFIException::endBasicBlockSection(const MachineBasicBlock &MBB) { + if (shouldEmitCFI) + Asm->OutStreamer->emitCFIEndProc(); } /// endFunction - Gather and emit post-function exception information. @@ -168,12 +149,3 @@ void DwarfCFIException::endFunction(const MachineFunction *MF) { emitExceptionTable(); } - -void DwarfCFIException::beginBasicBlock(const MachineBasicBlock &MBB) { - beginFragment(&MBB, getExceptionSym); -} - -void DwarfCFIException::endBasicBlock(const MachineBasicBlock &MBB) { - if (shouldEmitCFI) - Asm->OutStreamer->emitCFIEndProc(); -} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index b26960cdebb8..6dde50375a60 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -13,7 +13,6 @@ #include "DwarfCompileUnit.h" #include "AddressPool.h" #include "DwarfExpression.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/BinaryFormat/Dwarf.h" @@ -36,6 +35,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include <iterator> +#include <optional> #include <string> #include <utility> @@ -121,8 +121,8 @@ unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) { // extend .file to support this. unsigned CUID = Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID(); if (!File) - return Asm->OutStreamer->emitDwarfFileDirective(0, "", "", None, None, - CUID); + return Asm->OutStreamer->emitDwarfFileDirective(0, "", "", std::nullopt, + std::nullopt, CUID); if (LastFile != File) { LastFile = File; @@ -203,7 +203,7 @@ void DwarfCompileUnit::addLocationAttribute( DIE *VariableDIE, const DIGlobalVariable *GV, ArrayRef<GlobalExpr> GlobalExprs) { bool addToAccelTable = false; DIELoc *Loc = nullptr; - Optional<unsigned> NVPTXAddressSpace; + std::optional<unsigned> NVPTXAddressSpace; std::unique_ptr<DIEDwarfExpression> DwarfExpr; for (const auto &GE : GlobalExprs) { const GlobalVariable *Global = GE.Var; @@ -340,7 +340,7 @@ void DwarfCompileUnit::addLocationAttribute( // correctly interpret address space of the variable address. const unsigned NVPTX_ADDR_global_space = 5; addUInt(*VariableDIE, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1, - NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_global_space); + NVPTXAddressSpace.value_or(NVPTX_ADDR_global_space)); } if (Loc) addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize()); @@ -445,7 +445,12 @@ void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin, // scope then create and insert DIEs for these variables. DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes()); + auto *ContextCU = static_cast<DwarfCompileUnit *>(SPDie->getUnit()); + return ContextCU->updateSubprogramScopeDIEImpl(SP, SPDie); +} +DIE &DwarfCompileUnit::updateSubprogramScopeDIEImpl(const DISubprogram *SP, + DIE *SPDie) { SmallVector<RangeSpan, 2> BB_List; // If basic block sections are on, ranges for each basic block section has // to be emitted separately. @@ -547,11 +552,8 @@ void DwarfCompileUnit::constructScopeDIE(LexicalScope *Scope, // Emit inlined subprograms. if (Scope->getParent() && isa<DISubprogram>(DS)) { - DIE *ScopeDIE = constructInlinedScopeDIE(Scope); - if (!ScopeDIE) - return; - - ParentScopeDIE.addChild(ScopeDIE); + DIE *ScopeDIE = constructInlinedScopeDIE(Scope, ParentScopeDIE); + assert(ScopeDIE && "Scope DIE should not be null."); createAndAddScopeChildren(Scope, *ScopeDIE); return; } @@ -650,9 +652,8 @@ void DwarfCompileUnit::attachRangesOrLowHighPC( attachRangesOrLowHighPC(Die, std::move(List)); } -// This scope represents inlined body of a function. Construct DIE to -// represent this concrete inlined copy of the function. -DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { +DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope, + DIE &ParentScopeDIE) { assert(Scope->getScopeNode()); auto *DS = Scope->getScopeNode(); auto *InlinedSP = getDISubprogram(DS); @@ -662,19 +663,20 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { assert(OriginDIE && "Unable to find original DIE for an inlined subprogram."); auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_inlined_subroutine); + ParentScopeDIE.addChild(ScopeDIE); addDIEEntry(*ScopeDIE, dwarf::DW_AT_abstract_origin, *OriginDIE); attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges()); // Add the call site information to the DIE. const DILocation *IA = Scope->getInlinedAt(); - addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None, + addUInt(*ScopeDIE, dwarf::DW_AT_call_file, std::nullopt, getOrCreateSourceID(IA->getFile())); - addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine()); + addUInt(*ScopeDIE, dwarf::DW_AT_call_line, std::nullopt, IA->getLine()); if (IA->getColumn()) - addUInt(*ScopeDIE, dwarf::DW_AT_call_column, None, IA->getColumn()); + addUInt(*ScopeDIE, dwarf::DW_AT_call_column, std::nullopt, IA->getColumn()); if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4) - addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None, + addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, std::nullopt, IA->getDiscriminator()); // Add name to the name table, we do this here because we're guaranteed @@ -845,7 +847,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, if (!DV.hasFrameIndexExprs()) return VariableDie; - Optional<unsigned> NVPTXAddressSpace; + std::optional<unsigned> NVPTXAddressSpace; DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); for (const auto &Fragment : DV.getFrameIndexExprs()) { @@ -893,7 +895,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, // correctly interpret address space of the variable address. const unsigned NVPTX_ADDR_local_space = 6; addUInt(*VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1, - NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_local_space); + NVPTXAddressSpace.value_or(NVPTX_ADDR_local_space)); } addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); if (DwarfExpr.TagOffset) @@ -1018,6 +1020,7 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) { DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope) { DIE &ScopeDIE = updateSubprogramScopeDIE(Sub); + auto *ContextCU = static_cast<DwarfCompileUnit *>(ScopeDIE.getUnit()); if (Scope) { assert(!Scope->getInlinedAt()); @@ -1025,8 +1028,10 @@ DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, // Collect lexical scope children first. // ObjectPointer might be a local (non-argument) local variable if it's a // block's synthetic this pointer. - if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, ScopeDIE)) - addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer); + if (DIE *ObjectPointer = + ContextCU->createAndAddScopeChildren(Scope, ScopeDIE)) + ContextCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, + *ObjectPointer); } // If this is a variadic function, add an unspecified parameter. @@ -1124,7 +1129,7 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( AbsDef = &ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, nullptr); ContextCU->applySubprogramAttributesToDefinition(SP, *AbsDef); ContextCU->addSInt(*AbsDef, dwarf::DW_AT_inline, - DD->getDwarfVersion() <= 4 ? Optional<dwarf::Form>() + DD->getDwarfVersion() <= 4 ? std::optional<dwarf::Form>() : dwarf::DW_FORM_implicit_const, dwarf::DW_INL_inlined); if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, *AbsDef)) @@ -1588,7 +1593,8 @@ void DwarfCompileUnit::createBaseTypeDIEs() { "_" + Twine(Btr.BitSize)).toStringRef(Str)); addUInt(Die, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Btr.Encoding); // Round up to smallest number of bytes that contains this number of bits. - addUInt(Die, dwarf::DW_AT_byte_size, None, divideCeil(Btr.BitSize, 8)); + addUInt(Die, dwarf::DW_AT_byte_size, std::nullopt, + divideCeil(Btr.BitSize, 8)); Btr.Die = &Die; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 61412cde34c8..7d87f35021bb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -192,6 +192,7 @@ public: /// variables in this scope then create and insert DIEs for these /// variables. DIE &updateSubprogramScopeDIE(const DISubprogram *SP); + DIE &updateSubprogramScopeDIEImpl(const DISubprogram *SP, DIE *SPDie); void constructScopeDIE(LexicalScope *Scope, DIE &ParentScopeDIE); @@ -204,9 +205,9 @@ public: void attachRangesOrLowHighPC(DIE &D, const SmallVectorImpl<InsnRange> &Ranges); - /// This scope represents inlined body of a function. Construct + /// This scope represents an inlined body of a function. Construct a /// DIE to represent this concrete inlined copy of the function. - DIE *constructInlinedScopeDIE(LexicalScope *Scope); + DIE *constructInlinedScopeDIE(LexicalScope *Scope, DIE &ParentScopeDIE); /// Construct new DW_TAG_lexical_block for this scope and /// attach DW_AT_low_pc/DW_AT_high_pc labels. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 54af14429907..cde790cc77fb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -56,6 +56,7 @@ #include <algorithm> #include <cstddef> #include <iterator> +#include <optional> #include <string> using namespace llvm; @@ -428,8 +429,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A) // https://sourceware.org/bugzilla/show_bug.cgi?id=11616 UseGNUTLSOpcode = tuneForGDB() || DwarfVersion < 3; - // GDB does not fully support the DWARF 4 representation for bitfields. - UseDWARF2Bitfields = (DwarfVersion < 4) || tuneForGDB(); + UseDWARF2Bitfields = DwarfVersion < 4; // The DWARF v5 string offsets table has - possibly shared - contributions // from each compile and type unit each preceded by a header. The string @@ -597,6 +597,9 @@ struct FwdRegParamInfo { /// Register worklist for finding call site values. using FwdRegWorklist = MapVector<unsigned, SmallVector<FwdRegParamInfo, 2>>; +/// Container for the set of registers known to be clobbered on the path to a +/// call site. +using ClobberedRegSet = SmallSet<Register, 16>; /// Append the expression \p Addition to \p Original and return the result. static const DIExpression *combineDIExpressions(const DIExpression *Original, @@ -668,7 +671,8 @@ static void addToFwdRegWorklist(FwdRegWorklist &Worklist, unsigned Reg, /// Interpret values loaded into registers by \p CurMI. static void interpretValues(const MachineInstr *CurMI, FwdRegWorklist &ForwardedRegWorklist, - ParamSet &Params) { + ParamSet &Params, + ClobberedRegSet &ClobberedRegUnits) { const MachineFunction *MF = CurMI->getMF(); const DIExpression *EmptyExpr = @@ -700,17 +704,19 @@ static void interpretValues(const MachineInstr *CurMI, // If the MI is an instruction defining one or more parameters' forwarding // registers, add those defines. + ClobberedRegSet NewClobberedRegUnits; auto getForwardingRegsDefinedByMI = [&](const MachineInstr &MI, SmallSetVector<unsigned, 4> &Defs) { if (MI.isDebugInstr()) return; for (const MachineOperand &MO : MI.operands()) { - if (MO.isReg() && MO.isDef() && - Register::isPhysicalRegister(MO.getReg())) { + if (MO.isReg() && MO.isDef() && MO.getReg().isPhysical()) { for (auto &FwdReg : ForwardedRegWorklist) if (TRI.regsOverlap(FwdReg.first, MO.getReg())) Defs.insert(FwdReg.first); + for (MCRegUnitIterator Units(MO.getReg(), &TRI); Units.isValid(); ++Units) + NewClobberedRegUnits.insert(*Units); } } }; @@ -719,8 +725,22 @@ static void interpretValues(const MachineInstr *CurMI, SmallSetVector<unsigned, 4> FwdRegDefs; getForwardingRegsDefinedByMI(*CurMI, FwdRegDefs); - if (FwdRegDefs.empty()) + if (FwdRegDefs.empty()) { + // Any definitions by this instruction will clobber earlier reg movements. + ClobberedRegUnits.insert(NewClobberedRegUnits.begin(), + NewClobberedRegUnits.end()); return; + } + + // It's possible that we find a copy from a non-volatile register to the param + // register, which is clobbered in the meantime. Test for clobbered reg unit + // overlaps before completing. + auto IsRegClobberedInMeantime = [&](Register Reg) -> bool { + for (auto &RegUnit : ClobberedRegUnits) + if (TRI.hasRegUnit(Reg, RegUnit)) + return true; + return false; + }; for (auto ParamFwdReg : FwdRegDefs) { if (auto ParamValue = TII.describeLoadedValue(*CurMI, ParamFwdReg)) { @@ -733,7 +753,8 @@ static void interpretValues(const MachineInstr *CurMI, Register SP = TLI.getStackPointerRegisterToSaveRestore(); Register FP = TRI.getFrameRegister(*MF); bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP); - if (TRI.isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) { + if (!IsRegClobberedInMeantime(RegLoc) && + (TRI.isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP)) { MachineLocation MLoc(RegLoc, /*Indirect=*/IsSPorFP); finishCallSiteParams(MLoc, ParamValue->second, ForwardedRegWorklist[ParamFwdReg], Params); @@ -755,6 +776,10 @@ static void interpretValues(const MachineInstr *CurMI, for (auto ParamFwdReg : FwdRegDefs) ForwardedRegWorklist.erase(ParamFwdReg); + // Any definitions by this instruction will clobber earlier reg movements. + ClobberedRegUnits.insert(NewClobberedRegUnits.begin(), + NewClobberedRegUnits.end()); + // Now that we are done handling this instruction, add items from the // temporary worklist to the real one. for (auto &New : TmpWorklistItems) @@ -764,7 +789,8 @@ static void interpretValues(const MachineInstr *CurMI, static bool interpretNextInstr(const MachineInstr *CurMI, FwdRegWorklist &ForwardedRegWorklist, - ParamSet &Params) { + ParamSet &Params, + ClobberedRegSet &ClobberedRegUnits) { // Skip bundle headers. if (CurMI->isBundle()) return true; @@ -782,7 +808,7 @@ static bool interpretNextInstr(const MachineInstr *CurMI, if (CurMI->getNumOperands() == 0) return true; - interpretValues(CurMI, ForwardedRegWorklist, Params); + interpretValues(CurMI, ForwardedRegWorklist, Params, ClobberedRegUnits); return true; } @@ -834,6 +860,7 @@ static void collectCallSiteParameters(const MachineInstr *CallMI, bool ShouldTryEmitEntryVals = MBB->getIterator() == MF->begin(); // Search for a loading value in forwarding registers inside call delay slot. + ClobberedRegSet ClobberedRegUnits; if (CallMI->hasDelaySlot()) { auto Suc = std::next(CallMI->getIterator()); // Only one-instruction delay slot is supported. @@ -842,14 +869,14 @@ static void collectCallSiteParameters(const MachineInstr *CallMI, assert(std::next(Suc) == BundleEnd && "More than one instruction in call delay slot"); // Try to interpret value loaded by instruction. - if (!interpretNextInstr(&*Suc, ForwardedRegWorklist, Params)) + if (!interpretNextInstr(&*Suc, ForwardedRegWorklist, Params, ClobberedRegUnits)) return; } // Search for a loading value in forwarding registers. for (; I != MBB->rend(); ++I) { // Try to interpret values loaded by instruction. - if (!interpretNextInstr(&*I, ForwardedRegWorklist, Params)) + if (!interpretNextInstr(&*I, ForwardedRegWorklist, Params, ClobberedRegUnits)) return; } @@ -929,8 +956,7 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, // the callee. const MachineOperand &CalleeOp = TII->getCalleeOperand(MI); if (!CalleeOp.isGlobal() && - (!CalleeOp.isReg() || - !Register::isPhysicalRegister(CalleeOp.getReg()))) + (!CalleeOp.isReg() || !CalleeOp.getReg().isPhysical())) continue; unsigned CallReg = 0; @@ -2004,6 +2030,17 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { if (MI->isMetaInstruction() || MI->getFlag(MachineInstr::FrameSetup)) return; const DebugLoc &DL = MI->getDebugLoc(); + unsigned Flags = 0; + + if (MI->getFlag(MachineInstr::FrameDestroy) && DL) { + const MachineBasicBlock *MBB = MI->getParent(); + if (MBB && (MBB != EpilogBeginBlock)) { + // First time FrameDestroy has been seen in this basic block + EpilogBeginBlock = MBB; + Flags |= DWARF2_FLAG_EPILOGUE_BEGIN; + } + } + // When we emit a line-0 record, we don't update PrevInstLoc; so look at // the last line number actually emitted, to see if it was line 0. unsigned LastAsmLine = @@ -2015,10 +2052,10 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { return; // We have an explicit location, same as the previous location. // But we might be coming back to it after a line 0 record. - if (LastAsmLine == 0 && DL.getLine() != 0) { + if ((LastAsmLine == 0 && DL.getLine() != 0) || Flags) { // Reinstate the source location but not marked as a statement. const MDNode *Scope = DL.getScope(); - recordSourceLine(DL.getLine(), DL.getCol(), Scope, /*Flags=*/0); + recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags); } return; } @@ -2059,7 +2096,6 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { // (The new location might be an explicit line 0, which we do emit.) if (DL.getLine() == 0 && LastAsmLine == 0) return; - unsigned Flags = 0; if (DL == PrologEndLoc) { Flags |= DWARF2_FLAG_PROLOGUE_END | DWARF2_FLAG_IS_STMT; PrologEndLoc = DebugLoc(); @@ -3530,13 +3566,14 @@ void DwarfDebug::insertSectionLabel(const MCSymbol *S) { AddrPool.getIndex(S); } -Optional<MD5::MD5Result> DwarfDebug::getMD5AsBytes(const DIFile *File) const { +std::optional<MD5::MD5Result> +DwarfDebug::getMD5AsBytes(const DIFile *File) const { assert(File); if (getDwarfVersion() < 5) - return None; - Optional<DIFile::ChecksumInfo<StringRef>> Checksum = File->getChecksum(); + return std::nullopt; + std::optional<DIFile::ChecksumInfo<StringRef>> Checksum = File->getChecksum(); if (!Checksum || Checksum->Kind != DIFile::CSK_MD5) - return None; + return std::nullopt; // Convert the string checksum to an MD5Result for the streamer. // The verifier validates the checksum so we assume it's okay. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 31e4081b7141..5d2ef8ee79a7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -116,7 +116,7 @@ class DbgVariable : public DbgEntity { /// Index of the entry list in DebugLocs. unsigned DebugLocListIndex = ~0u; /// DW_OP_LLVM_tag_offset value from DebugLocs. - Optional<uint8_t> DebugLocListTagOffset; + std::optional<uint8_t> DebugLocListTagOffset; /// Single value location description. std::unique_ptr<DbgValueLoc> ValueLoc = nullptr; @@ -175,7 +175,9 @@ public: void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; } unsigned getDebugLocListIndex() const { return DebugLocListIndex; } void setDebugLocListTagOffset(uint8_t O) { DebugLocListTagOffset = O; } - Optional<uint8_t> getDebugLocListTagOffset() const { return DebugLocListTagOffset; } + std::optional<uint8_t> getDebugLocListTagOffset() const { + return DebugLocListTagOffset; + } StringRef getName() const { return getVariable()->getName(); } const DbgValueLoc *getValueLoc() const { return ValueLoc.get(); } /// Get the FI entries, sorted by fragment offset. @@ -839,7 +841,7 @@ public: /// If the \p File has an MD5 checksum, return it as an MD5Result /// allocated in the MCContext. - Optional<MD5::MD5Result> getMD5AsBytes(const DIFile *File) const; + std::optional<MD5::MD5Result> getMD5AsBytes(const DIFile *File) const; }; } // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h index e5cda4739fde..c2c11c7bc14d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h @@ -21,20 +21,7 @@ namespace llvm { class MachineFunction; class ARMTargetStreamer; -class LLVM_LIBRARY_VISIBILITY DwarfCFIExceptionBase : public EHStreamer { -protected: - DwarfCFIExceptionBase(AsmPrinter *A); - - /// Per-function flag to indicate if frame CFI info should be emitted. - bool shouldEmitCFI = false; - /// Per-module flag to indicate if .cfi_section has beeen emitted. - bool hasEmittedCFISections = false; - - void markFunctionEnd() override; - void endFragment() override; -}; - -class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public DwarfCFIExceptionBase { +class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public EHStreamer { /// Per-function flag to indicate if .cfi_personality should be emitted. bool shouldEmitPersonality = false; @@ -44,6 +31,17 @@ class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public DwarfCFIExceptionBase { /// Per-function flag to indicate if .cfi_lsda should be emitted. bool shouldEmitLSDA = false; + /// Per-function flag to indicate if frame CFI info should be emitted. + bool shouldEmitCFI = false; + + /// Per-module flag to indicate if .cfi_section has beeen emitted. + bool hasEmittedCFISections = false; + + /// Vector of all personality functions seen so far in the module. + std::vector<const GlobalValue *> Personalities; + + void addPersonality(const GlobalValue *Personality); + public: //===--------------------------------------------------------------------===// // Main entry points. @@ -61,14 +59,17 @@ public: /// Gather and emit post-function exception information. void endFunction(const MachineFunction *) override; - void beginFragment(const MachineBasicBlock *MBB, - ExceptionSymbolProvider ESP) override; - - void beginBasicBlock(const MachineBasicBlock &MBB) override; - void endBasicBlock(const MachineBasicBlock &MBB) override; + void beginBasicBlockSection(const MachineBasicBlock &MBB) override; + void endBasicBlockSection(const MachineBasicBlock &MBB) override; }; -class LLVM_LIBRARY_VISIBILITY ARMException : public DwarfCFIExceptionBase { +class LLVM_LIBRARY_VISIBILITY ARMException : public EHStreamer { + /// Per-function flag to indicate if frame CFI info should be emitted. + bool shouldEmitCFI = false; + + /// Per-module flag to indicate if .cfi_section has beeen emitted. + bool hasEmittedCFISections = false; + void emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) override; ARMTargetStreamer &getTargetStreamer(); @@ -88,9 +89,11 @@ public: /// Gather and emit post-function exception information. void endFunction(const MachineFunction *) override; + + void markFunctionEnd() override; }; -class LLVM_LIBRARY_VISIBILITY AIXException : public DwarfCFIExceptionBase { +class LLVM_LIBRARY_VISIBILITY AIXException : public EHStreamer { /// This is AIX's compat unwind section, which unwinder would use /// to find the location of LSDA area and personality rountine. void emitExceptionInfoTable(const MCSymbol *LSDA, const MCSymbol *PerSym); @@ -98,11 +101,8 @@ class LLVM_LIBRARY_VISIBILITY AIXException : public DwarfCFIExceptionBase { public: AIXException(AsmPrinter *A); - void markFunctionEnd() override; - void endModule() override {} void beginFunction(const MachineFunction *MF) override {} - void endFunction(const MachineFunction *MF) override; }; } // End of namespace llvm diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 1c21d5ee8bb1..ab6967f50e30 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -99,7 +99,7 @@ void DwarfExpression::addAnd(unsigned Mask) { bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, llvm::Register MachineReg, unsigned MaxSize) { - if (!llvm::Register::isPhysicalRegister(MachineReg)) { + if (!MachineReg.isPhysical()) { if (isFrameRegister(TRI, MachineReg)) { DwarfRegs.push_back(Register::createRegister(-1, nullptr)); return true; @@ -494,7 +494,7 @@ bool DwarfExpression::addExpression( // and not any other parts of the following DWARF expression. assert(!IsEmittingEntryValue && "Can't emit entry value around expression"); - Optional<DIExpression::ExprOperand> PrevConvertOp = None; + std::optional<DIExpression::ExprOperand> PrevConvertOp; while (ExprCursor) { auto Op = ExprCursor.take(); @@ -604,7 +604,7 @@ bool DwarfExpression::addExpression( emitLegacySExt(PrevConvertOp->getArg(0)); else if (Encoding == dwarf::DW_ATE_unsigned) emitLegacyZExt(PrevConvertOp->getArg(0)); - PrevConvertOp = None; + PrevConvertOp = std::nullopt; } else { PrevConvertOp = Op; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h index e605fe2f7d39..667a9efc6f6c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -15,13 +15,12 @@ #include "ByteStreamer.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/DebugInfoMetadata.h" #include <cassert> #include <cstdint> #include <iterator> +#include <optional> namespace llvm { @@ -53,9 +52,9 @@ public: DIExpressionCursor(const DIExpressionCursor &) = default; /// Consume one operation. - Optional<DIExpression::ExprOperand> take() { + std::optional<DIExpression::ExprOperand> take() { if (Start == End) - return None; + return std::nullopt; return *(Start++); } @@ -63,20 +62,20 @@ public: void consume(unsigned N) { std::advance(Start, N); } /// Return the current operation. - Optional<DIExpression::ExprOperand> peek() const { + std::optional<DIExpression::ExprOperand> peek() const { if (Start == End) - return None; + return std::nullopt; return *(Start); } /// Return the next operation. - Optional<DIExpression::ExprOperand> peekNext() const { + std::optional<DIExpression::ExprOperand> peekNext() const { if (Start == End) - return None; + return std::nullopt; auto Next = Start.getNext(); if (Next == End) - return None; + return std::nullopt; return *Next; } @@ -88,7 +87,7 @@ public: DIExpression::expr_op_iterator end() const { return End; } /// Retrieve the fragment information, if any. - Optional<DIExpression::FragmentInfo> getFragmentInfo() const { + std::optional<DIExpression::FragmentInfo> getFragmentInfo() const { return DIExpression::getFragmentInfo(Start, End); } }; @@ -170,7 +169,7 @@ public: bool isParameterValue() { return LocationFlags & CallSiteParamValue; } - Optional<uint8_t> TagOffset; + std::optional<uint8_t> TagOffset; protected: /// Push a DW_OP_piece / DW_OP_bit_piece for emitting later, if one is needed diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp index a497aa07284e..3fe437a07c92 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -42,7 +42,7 @@ void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) { // Skip CUs that ended up not being needed (split CUs that were abandoned // because they added no information beyond the non-split CU) - if (llvm::empty(TheU->getUnitDie().values())) + if (TheU->getUnitDie().values().empty()) return; Asm->OutStreamer->switchSection(S); @@ -66,7 +66,7 @@ void DwarfFile::computeSizeAndOffsets() { // Skip CUs that ended up not being needed (split CUs that were abandoned // because they added no information beyond the non-split CU) - if (llvm::empty(TheU->getUnitDie().values())) + if (TheU->getUnitDie().values().empty()) return; TheU->setDebugSectionOffset(SecOffset); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp index 67b72f0b455d..2292590b135e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -20,7 +20,7 @@ using namespace llvm; DwarfStringPool::DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix) : Pool(A), Prefix(Prefix), - ShouldCreateSymbols(Asm.MAI->doesDwarfUseRelocationsAcrossSections()) {} + ShouldCreateSymbols(Asm.doesDwarfUseRelocationsAcrossSections()) {} StringMapEntry<DwarfStringPool::EntryTy> & DwarfStringPool::getEntryImpl(AsmPrinter &Asm, StringRef Str) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 81238b0fe0d2..c2ff899c04ab 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -16,7 +16,6 @@ #include "DwarfExpression.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/Constants.h" @@ -219,7 +218,7 @@ void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) { } void DwarfUnit::addUInt(DIEValueList &Die, dwarf::Attribute Attribute, - Optional<dwarf::Form> Form, uint64_t Integer) { + std::optional<dwarf::Form> Form, uint64_t Integer) { if (!Form) Form = DIEInteger::BestForm(false, Integer); assert(Form != dwarf::DW_FORM_implicit_const && @@ -233,13 +232,13 @@ void DwarfUnit::addUInt(DIEValueList &Block, dwarf::Form Form, } void DwarfUnit::addSInt(DIEValueList &Die, dwarf::Attribute Attribute, - Optional<dwarf::Form> Form, int64_t Integer) { + std::optional<dwarf::Form> Form, int64_t Integer) { if (!Form) Form = DIEInteger::BestForm(true, Integer); addAttribute(Die, Attribute, *Form, DIEInteger(Integer)); } -void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form, +void DwarfUnit::addSInt(DIELoc &Die, std::optional<dwarf::Form> Form, int64_t Integer) { addSInt(Die, (dwarf::Attribute)0, Form, Integer); } @@ -411,8 +410,8 @@ void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, const DIFile *File) { return; unsigned FileID = getOrCreateSourceID(File); - addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, None, Line); + addUInt(Die, dwarf::DW_AT_decl_file, std::nullopt, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, std::nullopt, Line); } void DwarfUnit::addSourceLine(DIE &Die, const DILocalVariable *V) { @@ -705,12 +704,12 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) { BTy->getEncoding()); uint64_t Size = BTy->getSizeInBits() >> 3; - addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); + addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, Size); if (BTy->isBigEndian()) - addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_big); + addUInt(Buffer, dwarf::DW_AT_endianity, std::nullopt, dwarf::DW_END_big); else if (BTy->isLittleEndian()) - addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_little); + addUInt(Buffer, dwarf::DW_AT_endianity, std::nullopt, dwarf::DW_END_little); } void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIStringType *STy) { @@ -734,7 +733,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIStringType *STy) { addBlock(Buffer, dwarf::DW_AT_string_length, DwarfExpr.finalize()); } else { uint64_t Size = STy->getSizeInBits() >> 3; - addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); + addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, Size); } if (DIExpression *Expr = STy->getStringLocationExp()) { @@ -785,11 +784,14 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) { && Tag != dwarf::DW_TAG_ptr_to_member_type && Tag != dwarf::DW_TAG_reference_type && Tag != dwarf::DW_TAG_rvalue_reference_type) - addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); + addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, Size); if (Tag == dwarf::DW_TAG_ptr_to_member_type) addDIEEntry(Buffer, dwarf::DW_AT_containing_type, *getOrCreateTypeDIE(cast<DIDerivedType>(DTy)->getClassType())); + + addAccess(Buffer, DTy->getFlags()); + // Add source line info if available and TyDesc is not a forward declaration. if (!DTy->isForwardDecl()) addSourceLine(Buffer, DTy); @@ -832,10 +834,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) { // Add prototype flag if we're dealing with a C language and the function has // been prototyped. - uint16_t Language = getLanguage(); - if (isPrototyped && - (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || - Language == dwarf::DW_LANG_ObjC)) + if (isPrototyped && dwarf::isC((dwarf::SourceLanguage)getLanguage())) addFlag(Buffer, dwarf::DW_AT_prototyped); // Add a DW_AT_calling_convention if this has an explicit convention. @@ -929,9 +928,11 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DDTy->getDiscriminantValue())) { if (DD->isUnsignedDIType(Discriminator->getBaseType())) - addUInt(Variant, dwarf::DW_AT_discr_value, None, CI->getZExtValue()); + addUInt(Variant, dwarf::DW_AT_discr_value, std::nullopt, + CI->getZExtValue()); else - addSInt(Variant, dwarf::DW_AT_discr_value, None, CI->getSExtValue()); + addSInt(Variant, dwarf::DW_AT_discr_value, std::nullopt, + CI->getSExtValue()); } constructMemberDIE(Variant, DDTy); } else { @@ -951,7 +952,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { if (!SetterName.empty()) addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName); if (unsigned PropertyAttributes = Property->getAttributes()) - addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None, + addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, std::nullopt, PropertyAttributes); } else if (auto *Composite = dyn_cast<DICompositeType>(Element)) { if (Composite->getTag() == dwarf::DW_TAG_variant_part) { @@ -1017,10 +1018,10 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { // TODO: Do we care about size for enum forward declarations? if (Size && (!CTy->isForwardDecl() || Tag == dwarf::DW_TAG_enumeration_type)) - addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); + addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, Size); else if (!CTy->isForwardDecl()) // Add zero size if it is not a forward declaration. - addUInt(Buffer, dwarf::DW_AT_byte_size, None, 0); + addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, 0); // If we're a forward decl, say so. if (CTy->isForwardDecl()) @@ -1055,7 +1056,7 @@ void DwarfUnit::constructTemplateTypeParameterDIE( addType(ParamDIE, TP->getType()); if (!TP->getName().empty()) addString(ParamDIE, dwarf::DW_AT_name, TP->getName()); - if (TP->isDefault() && (DD->getDwarfVersion() >= 5)) + if (TP->isDefault() && isCompatibleWithVersion(5)) addFlag(ParamDIE, dwarf::DW_AT_default_value); } @@ -1069,7 +1070,7 @@ void DwarfUnit::constructTemplateValueParameterDIE( addType(ParamDIE, VP->getType()); if (!VP->getName().empty()) addString(ParamDIE, dwarf::DW_AT_name, VP->getName()); - if (VP->isDefault() && (DD->getDwarfVersion() >= 5)) + if (VP->isDefault() && isCompatibleWithVersion(5)) addFlag(ParamDIE, dwarf::DW_AT_default_value); if (Metadata *Val = VP->getValue()) { if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val)) @@ -1139,10 +1140,10 @@ DIE *DwarfUnit::getOrCreateModule(const DIModule *M) { if (!M->getAPINotesFile().empty()) addString(MDie, dwarf::DW_AT_LLVM_apinotes, M->getAPINotesFile()); if (M->getFile()) - addUInt(MDie, dwarf::DW_AT_decl_file, None, + addUInt(MDie, dwarf::DW_AT_decl_file, std::nullopt, getOrCreateSourceID(M->getFile())); if (M->getLineNo()) - addUInt(MDie, dwarf::DW_AT_decl_line, None, M->getLineNo()); + addUInt(MDie, dwarf::DW_AT_decl_line, std::nullopt, M->getLineNo()); if (M->getIsDecl()) addFlag(MDie, dwarf::DW_AT_declaration); @@ -1205,10 +1206,10 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP, unsigned DeclID = getOrCreateSourceID(SPDecl->getFile()); unsigned DefID = getOrCreateSourceID(SP->getFile()); if (DeclID != DefID) - addUInt(SPDie, dwarf::DW_AT_decl_file, None, DefID); + addUInt(SPDie, dwarf::DW_AT_decl_file, std::nullopt, DefID); if (SP->getLine() != SPDecl->getLine()) - addUInt(SPDie, dwarf::DW_AT_decl_line, None, SP->getLine()); + addUInt(SPDie, dwarf::DW_AT_decl_line, std::nullopt, SP->getLine()); } } @@ -1259,10 +1260,7 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, // Add the prototype if we have a prototype and we have a C like // language. - uint16_t Language = getLanguage(); - if (SP->isPrototyped() && - (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || - Language == dwarf::DW_LANG_ObjC)) + if (SP->isPrototyped() && dwarf::isC((dwarf::SourceLanguage)getLanguage())) addFlag(SPDie, dwarf::DW_AT_prototyped); if (SP->isObjCDirect()) @@ -1376,7 +1374,7 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, } else if (auto *BI = Bound.dyn_cast<ConstantInt *>()) { if (Attr == dwarf::DW_AT_count) { if (BI->getSExtValue() != -1) - addUInt(DW_Subrange, Attr, None, BI->getSExtValue()); + addUInt(DW_Subrange, Attr, std::nullopt, BI->getSExtValue()); } else if (Attr != dwarf::DW_AT_lower_bound || DefaultLowerBound == -1 || BI->getSExtValue() != DefaultLowerBound) addSInt(DW_Subrange, Attr, dwarf::DW_FORM_sdata, BI->getSExtValue()); @@ -1437,7 +1435,7 @@ DIE *DwarfUnit::getIndexTyDie() { IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, getUnitDie()); StringRef Name = "__ARRAY_SIZE_TYPE__"; addString(*IndexTyDie, dwarf::DW_AT_name, Name); - addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t)); + addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, std::nullopt, sizeof(int64_t)); addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, dwarf::getArrayIndexTypeEncoding( (dwarf::SourceLanguage)getLanguage())); @@ -1478,7 +1476,7 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) { if (CTy->isVector()) { addFlag(Buffer, dwarf::DW_AT_GNU_vector); if (hasVectorBeenPadded(CTy)) - addUInt(Buffer, dwarf::DW_AT_byte_size, None, + addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, CTy->getSizeInBits() / CHAR_BIT); } @@ -1625,12 +1623,12 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { uint32_t AlignInBytes = DT->getAlignInBytes(); uint64_t OffsetInBytes; - bool IsBitfield = FieldSize && Size != FieldSize; + bool IsBitfield = DT->isBitField(); if (IsBitfield) { // Handle bitfield, assume bytes are 8 bits. if (DD->useDWARF2Bitfields()) - addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8); - addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size); + addUInt(MemberDie, dwarf::DW_AT_byte_size, std::nullopt, FieldSize / 8); + addUInt(MemberDie, dwarf::DW_AT_bit_size, std::nullopt, Size); uint64_t Offset = DT->getOffsetInBits(); // We can't use DT->getAlignInBits() here: AlignInBits for member type @@ -1652,10 +1650,10 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { if (Asm->getDataLayout().isLittleEndian()) Offset = FieldSize - (Offset + Size); - addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, Offset); + addUInt(MemberDie, dwarf::DW_AT_bit_offset, std::nullopt, Offset); OffsetInBytes = FieldOffset >> 3; } else { - addUInt(MemberDie, dwarf::DW_AT_data_bit_offset, None, Offset); + addUInt(MemberDie, dwarf::DW_AT_data_bit_offset, std::nullopt, Offset); } } else { // This is not a bitfield. @@ -1679,7 +1677,7 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { addUInt(MemberDie, dwarf::DW_AT_data_member_location, dwarf::DW_FORM_udata, OffsetInBytes); else - addUInt(MemberDie, dwarf::DW_AT_data_member_location, None, + addUInt(MemberDie, dwarf::DW_AT_data_member_location, std::nullopt, OffsetInBytes); } } @@ -1798,7 +1796,7 @@ void DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute, void DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Label, const MCSymbol *Sec) { - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + if (Asm->doesDwarfUseRelocationsAcrossSections()) addLabel(Die, Attribute, DD->getDwarfSectionOffsetForm(), Label); else addSectionDelta(Die, Attribute, Label, Sec); @@ -1821,7 +1819,7 @@ void DwarfTypeUnit::addGlobalType(const DIType *Ty, const DIE &Die, } const MCSymbol *DwarfUnit::getCrossSectionRelativeBaseAddress() const { - if (!Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + if (!Asm->doesDwarfUseRelocationsAcrossSections()) return nullptr; if (isDwoUnit()) return nullptr; @@ -1847,3 +1845,7 @@ void DwarfUnit::addRnglistsBase() { void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) { DD->getAddressPool().resetUsedFlag(true); } + +bool DwarfUnit::isCompatibleWithVersion(uint16_t Version) const { + return !Asm->TM.Options.DebugStrictDwarf || DD->getDwarfVersion() >= Version; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 48d63d126701..0caa6adbfa62 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -15,10 +15,10 @@ #include "DwarfDebug.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" #include "llvm/Target/TargetMachine.h" +#include <optional> #include <string> namespace llvm { @@ -143,15 +143,15 @@ public: /// Add an unsigned integer attribute data and value. void addUInt(DIEValueList &Die, dwarf::Attribute Attribute, - Optional<dwarf::Form> Form, uint64_t Integer); + std::optional<dwarf::Form> Form, uint64_t Integer); void addUInt(DIEValueList &Block, dwarf::Form Form, uint64_t Integer); /// Add an signed integer attribute data and value. void addSInt(DIEValueList &Die, dwarf::Attribute Attribute, - Optional<dwarf::Form> Form, int64_t Integer); + std::optional<dwarf::Form> Form, int64_t Integer); - void addSInt(DIELoc &Die, Optional<dwarf::Form> Form, int64_t Integer); + void addSInt(DIELoc &Die, std::optional<dwarf::Form> Form, int64_t Integer); /// Add a string attribute data and value. /// @@ -350,6 +350,10 @@ private: virtual bool isDwoUnit() const = 0; const MCSymbol *getCrossSectionRelativeBaseAddress() const override; + + /// Returns 'true' if the current DwarfVersion is compatible + /// with the specified \p Version. + bool isCompatibleWithVersion(uint16_t Version) const; }; class DwarfTypeUnit final : public DwarfUnit { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 31644959bdca..67e2c0e07095 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -195,6 +195,12 @@ void EHStreamer::computePadMap( const LandingPadInfo *LandingPad = LandingPads[i]; for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) { MCSymbol *BeginLabel = LandingPad->BeginLabels[j]; + MCSymbol *EndLabel = LandingPad->BeginLabels[j]; + // If we have deleted the code for a given invoke after registering it in + // the LandingPad label list, the associated symbols will not have been + // emitted. In that case, ignore this callsite entry. + if (!BeginLabel->isDefined() || !EndLabel->isDefined()) + continue; assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!"); PadRange P = { i, j }; PadMap[BeginLabel] = P; @@ -383,8 +389,14 @@ MCSymbol *EHStreamer::emitExceptionTable() { SmallVector<const LandingPadInfo *, 64> LandingPads; LandingPads.reserve(PadInfos.size()); - for (const LandingPadInfo &LPI : PadInfos) + for (const LandingPadInfo &LPI : PadInfos) { + // If a landing-pad has an associated label, but the label wasn't ever + // emitted, then skip it. (This can occur if the landingpad's MBB was + // deleted). + if (LPI.LandingPadLabel && !LPI.LandingPadLabel->isDefined()) + continue; LandingPads.push_back(&LPI); + } // Order landing pads lexicographically by type id. llvm::sort(LandingPads, [](const LandingPadInfo *L, const LandingPadInfo *R) { @@ -663,9 +675,10 @@ MCSymbol *EHStreamer::emitExceptionTable() { Asm->OutStreamer->emitLabel(CSRange.ExceptionLabel); // Emit the LSDA header. - // If only one call-site range exists, LPStart is omitted as it is the - // same as the function entry. - if (CallSiteRanges.size() == 1) { + // LPStart is omitted if either we have a single call-site range (in which + // case the function entry is treated as @LPStart) or if this function has + // no landing pads (in which case @LPStart is undefined). + if (CallSiteRanges.size() == 1 || LandingPadRange == nullptr) { Asm->emitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart"); } else if (!Asm->isPositionIndependent()) { // For more than one call-site ranges, LPStart must be explicitly diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp index 135eabc34838..3e75b4371033 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp @@ -48,5 +48,6 @@ void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index, } SmallVector<InlineSite, 8> InlineStack(llvm::reverse(ReversedInlineStack)); - Asm->OutStreamer->emitPseudoProbe(Guid, Index, Type, Attr, InlineStack); + Asm->OutStreamer->emitPseudoProbe(Guid, Index, Type, Attr, InlineStack, + Asm->CurrentFnSym); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp index a514ff161cee..bf65e525dde1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp @@ -42,16 +42,6 @@ void WasmException::endModule() { } } -void WasmException::markFunctionEnd() { - // Get rid of any dead landing pads. - if (!Asm->MF->getLandingPads().empty()) { - auto *NonConstMF = const_cast<MachineFunction *>(Asm->MF); - // Wasm does not set BeginLabel and EndLabel information for landing pads, - // so we should set the second argument false. - NonConstMF->tidyLandingPads(nullptr, /* TidyIfNoBeginLabels */ false); - } -} - void WasmException::endFunction(const MachineFunction *MF) { bool ShouldEmitExceptionTable = false; for (const LandingPadInfo &Info : MF->getLandingPads()) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h index 419b569d123c..86cc37dfde07 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h @@ -28,7 +28,6 @@ public: void endModule() override; void beginFunction(const MachineFunction *MF) override {} - void markFunctionEnd() override; void endFunction(const MachineFunction *MF) override; protected: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp index c3ca9c92bf71..7a800438592c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -130,14 +130,6 @@ void WinException::endFunction(const MachineFunction *MF) { if (F.hasPersonalityFn()) Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts()); - // Get rid of any dead landing pads if we're not using funclets. In funclet - // schemes, the landing pad is not actually reachable. It only exists so - // that we can emit the right table data. - if (!isFuncletEHPersonality(Per)) { - MachineFunction *NonConstMF = const_cast<MachineFunction*>(MF); - NonConstMF->tidyLandingPads(); - } - endFuncletImpl(); // endFunclet will emit the necessary .xdata tables for table-based SEH. @@ -736,7 +728,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { // EHFlags & 1 -> Synchronous exceptions only, no async exceptions. // EHFlags & 2 -> ??? // EHFlags & 4 -> The function is noexcept(true), unwinding can't continue. - OS.emitValueToAlignment(4); + OS.emitValueToAlignment(Align(4)); OS.emitLabel(FuncInfoXData); AddComment("MagicNumber"); @@ -1010,7 +1002,7 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) { // Emit the __ehtable label that we use for llvm.x86.seh.lsda. MCSymbol *LSDALabel = Asm->OutContext.getOrCreateLSDASymbol(FLinkageName); - OS.emitValueToAlignment(4); + OS.emitValueToAlignment(Align(4)); OS.emitLabel(LSDALabel); const auto *Per = cast<Function>(F.getPersonalityFn()->stripPointerCasts()); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp new file mode 100644 index 000000000000..7098824dbe4b --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp @@ -0,0 +1,2426 @@ +#include "llvm/CodeGen/AssignmentTrackingAnalysis.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/IntervalMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/UniqueVector.h" +#include "llvm/Analysis/Interval.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/PrintPasses.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <assert.h> +#include <cstdint> +#include <optional> +#include <sstream> +#include <unordered_map> + +using namespace llvm; +#define DEBUG_TYPE "debug-ata" + +STATISTIC(NumDefsScanned, "Number of dbg locs that get scanned for removal"); +STATISTIC(NumDefsRemoved, "Number of dbg locs removed"); +STATISTIC(NumWedgesScanned, "Number of dbg wedges scanned"); +STATISTIC(NumWedgesChanged, "Number of dbg wedges changed"); + +static cl::opt<unsigned> + MaxNumBlocks("debug-ata-max-blocks", cl::init(10000), + cl::desc("Maximum num basic blocks before debug info dropped"), + cl::Hidden); +/// Option for debugging the pass, determines if the memory location fragment +/// filling happens after generating the variable locations. +static cl::opt<bool> EnableMemLocFragFill("mem-loc-frag-fill", cl::init(true), + cl::Hidden); +/// Print the results of the analysis. Respects -filter-print-funcs. +static cl::opt<bool> PrintResults("print-debug-ata", cl::init(false), + cl::Hidden); + +// Implicit conversions are disabled for enum class types, so unfortunately we +// need to create a DenseMapInfo wrapper around the specified underlying type. +template <> struct llvm::DenseMapInfo<VariableID> { + using Wrapped = DenseMapInfo<unsigned>; + static inline VariableID getEmptyKey() { + return static_cast<VariableID>(Wrapped::getEmptyKey()); + } + static inline VariableID getTombstoneKey() { + return static_cast<VariableID>(Wrapped::getTombstoneKey()); + } + static unsigned getHashValue(const VariableID &Val) { + return Wrapped::getHashValue(static_cast<unsigned>(Val)); + } + static bool isEqual(const VariableID &LHS, const VariableID &RHS) { + return LHS == RHS; + } +}; + +/// Helper class to build FunctionVarLocs, since that class isn't easy to +/// modify. TODO: There's not a great deal of value in the split, it could be +/// worth merging the two classes. +class FunctionVarLocsBuilder { + friend FunctionVarLocs; + UniqueVector<DebugVariable> Variables; + // Use an unordered_map so we don't invalidate iterators after + // insert/modifications. + std::unordered_map<const Instruction *, SmallVector<VarLocInfo>> + VarLocsBeforeInst; + + SmallVector<VarLocInfo> SingleLocVars; + +public: + /// Find or insert \p V and return the ID. + VariableID insertVariable(DebugVariable V) { + return static_cast<VariableID>(Variables.insert(V)); + } + + /// Get a variable from its \p ID. + const DebugVariable &getVariable(VariableID ID) const { + return Variables[static_cast<unsigned>(ID)]; + } + + /// Return ptr to wedge of defs or nullptr if no defs come just before /p + /// Before. + const SmallVectorImpl<VarLocInfo> *getWedge(const Instruction *Before) const { + auto R = VarLocsBeforeInst.find(Before); + if (R == VarLocsBeforeInst.end()) + return nullptr; + return &R->second; + } + + /// Replace the defs that come just before /p Before with /p Wedge. + void setWedge(const Instruction *Before, SmallVector<VarLocInfo> &&Wedge) { + VarLocsBeforeInst[Before] = std::move(Wedge); + } + + /// Add a def for a variable that is valid for its lifetime. + void addSingleLocVar(DebugVariable Var, DIExpression *Expr, DebugLoc DL, + Value *V) { + VarLocInfo VarLoc; + VarLoc.VariableID = insertVariable(Var); + VarLoc.Expr = Expr; + VarLoc.DL = DL; + VarLoc.V = V; + SingleLocVars.emplace_back(VarLoc); + } + + /// Add a def to the wedge of defs just before /p Before. + void addVarLoc(Instruction *Before, DebugVariable Var, DIExpression *Expr, + DebugLoc DL, Value *V) { + VarLocInfo VarLoc; + VarLoc.VariableID = insertVariable(Var); + VarLoc.Expr = Expr; + VarLoc.DL = DL; + VarLoc.V = V; + VarLocsBeforeInst[Before].emplace_back(VarLoc); + } +}; + +void FunctionVarLocs::print(raw_ostream &OS, const Function &Fn) const { + // Print the variable table first. TODO: Sorting by variable could make the + // output more stable? + unsigned Counter = -1; + OS << "=== Variables ===\n"; + for (const DebugVariable &V : Variables) { + ++Counter; + // Skip first entry because it is a dummy entry. + if (Counter == 0) { + continue; + } + OS << "[" << Counter << "] " << V.getVariable()->getName(); + if (auto F = V.getFragment()) + OS << " bits [" << F->OffsetInBits << ", " + << F->OffsetInBits + F->SizeInBits << ")"; + if (const auto *IA = V.getInlinedAt()) + OS << " inlined-at " << *IA; + OS << "\n"; + } + + auto PrintLoc = [&OS](const VarLocInfo &Loc) { + OS << "DEF Var=[" << (unsigned)Loc.VariableID << "]" + << " Expr=" << *Loc.Expr << " V=" << *Loc.V << "\n"; + }; + + // Print the single location variables. + OS << "=== Single location vars ===\n"; + for (auto It = single_locs_begin(), End = single_locs_end(); It != End; + ++It) { + PrintLoc(*It); + } + + // Print the non-single-location defs in line with IR. + OS << "=== In-line variable defs ==="; + for (const BasicBlock &BB : Fn) { + OS << "\n" << BB.getName() << ":\n"; + for (const Instruction &I : BB) { + for (auto It = locs_begin(&I), End = locs_end(&I); It != End; ++It) { + PrintLoc(*It); + } + OS << I << "\n"; + } + } +} + +void FunctionVarLocs::init(FunctionVarLocsBuilder &Builder) { + // Add the single-location variables first. + for (const auto &VarLoc : Builder.SingleLocVars) + VarLocRecords.emplace_back(VarLoc); + // Mark the end of the section. + SingleVarLocEnd = VarLocRecords.size(); + + // Insert a contiguous block of VarLocInfos for each instruction, mapping it + // to the start and end position in the vector with VarLocsBeforeInst. + for (auto &P : Builder.VarLocsBeforeInst) { + unsigned BlockStart = VarLocRecords.size(); + for (const VarLocInfo &VarLoc : P.second) + VarLocRecords.emplace_back(VarLoc); + unsigned BlockEnd = VarLocRecords.size(); + // Record the start and end indices. + if (BlockEnd != BlockStart) + VarLocsBeforeInst[P.first] = {BlockStart, BlockEnd}; + } + + // Copy the Variables vector from the builder's UniqueVector. + assert(Variables.empty() && "Expect clear before init"); + // UniqueVectors IDs are one-based (which means the VarLocInfo VarID values + // are one-based) so reserve an extra and insert a dummy. + Variables.reserve(Builder.Variables.size() + 1); + Variables.push_back(DebugVariable(nullptr, std::nullopt, nullptr)); + Variables.append(Builder.Variables.begin(), Builder.Variables.end()); +} + +void FunctionVarLocs::clear() { + Variables.clear(); + VarLocRecords.clear(); + VarLocsBeforeInst.clear(); + SingleVarLocEnd = 0; +} + +/// Walk backwards along constant GEPs and bitcasts to the base storage from \p +/// Start as far as possible. Prepend \Expression with the offset and append it +/// with a DW_OP_deref that haes been implicit until now. Returns the walked-to +/// value and modified expression. +static std::pair<Value *, DIExpression *> +walkToAllocaAndPrependOffsetDeref(const DataLayout &DL, Value *Start, + DIExpression *Expression) { + APInt OffsetInBytes(DL.getTypeSizeInBits(Start->getType()), false); + Value *End = + Start->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetInBytes); + SmallVector<uint64_t, 3> Ops; + if (OffsetInBytes.getBoolValue()) { + Ops = {dwarf::DW_OP_plus_uconst, OffsetInBytes.getZExtValue()}; + Expression = DIExpression::prependOpcodes( + Expression, Ops, /*StackValue=*/false, /*EntryValue=*/false); + } + Expression = DIExpression::append(Expression, {dwarf::DW_OP_deref}); + return {End, Expression}; +} + +/// Extract the offset used in \p DIExpr. Returns std::nullopt if the expression +/// doesn't explicitly describe a memory location with DW_OP_deref or if the +/// expression is too complex to interpret. +static std::optional<int64_t> +getDerefOffsetInBytes(const DIExpression *DIExpr) { + int64_t Offset = 0; + const unsigned NumElements = DIExpr->getNumElements(); + const auto Elements = DIExpr->getElements(); + unsigned NextElement = 0; + // Extract the offset. + if (NumElements > 2 && Elements[0] == dwarf::DW_OP_plus_uconst) { + Offset = Elements[1]; + NextElement = 2; + } else if (NumElements > 3 && Elements[0] == dwarf::DW_OP_constu) { + NextElement = 3; + if (Elements[2] == dwarf::DW_OP_plus) + Offset = Elements[1]; + else if (Elements[2] == dwarf::DW_OP_minus) + Offset = -Elements[1]; + else + return std::nullopt; + } + + // If that's all there is it means there's no deref. + if (NextElement >= NumElements) + return std::nullopt; + + // Check the next element is DW_OP_deref - otherwise this is too complex or + // isn't a deref expression. + if (Elements[NextElement] != dwarf::DW_OP_deref) + return std::nullopt; + + // Check the final operation is either the DW_OP_deref or is a fragment. + if (NumElements == NextElement + 1) + return Offset; // Ends with deref. + else if (NumElements == NextElement + 3 && + Elements[NextElement] == dwarf::DW_OP_LLVM_fragment) + return Offset; // Ends with deref + fragment. + + // Don't bother trying to interpret anything more complex. + return std::nullopt; +} + +/// A whole (unfragmented) source variable. +using DebugAggregate = std::pair<const DILocalVariable *, const DILocation *>; +static DebugAggregate getAggregate(const DbgVariableIntrinsic *DII) { + return DebugAggregate(DII->getVariable(), DII->getDebugLoc().getInlinedAt()); +} +static DebugAggregate getAggregate(const DebugVariable &Var) { + return DebugAggregate(Var.getVariable(), Var.getInlinedAt()); +} + +namespace { +/// In dwarf emission, the following sequence +/// 1. dbg.value ... Fragment(0, 64) +/// 2. dbg.value ... Fragment(0, 32) +/// effectively sets Fragment(32, 32) to undef (each def sets all bits not in +/// the intersection of the fragments to having "no location"). This makes +/// sense for implicit location values because splitting the computed values +/// could be troublesome, and is probably quite uncommon. When we convert +/// dbg.assigns to dbg.value+deref this kind of thing is common, and describing +/// a location (memory) rather than a value means we don't need to worry about +/// splitting any values, so we try to recover the rest of the fragment +/// location here. +/// This class performs a(nother) dataflow analysis over the function, adding +/// variable locations so that any bits of a variable with a memory location +/// have that location explicitly reinstated at each subsequent variable +/// location definition that that doesn't overwrite those bits. i.e. after a +/// variable location def, insert new defs for the memory location with +/// fragments for the difference of "all bits currently in memory" and "the +/// fragment of the second def". +class MemLocFragmentFill { + Function &Fn; + FunctionVarLocsBuilder *FnVarLocs; + const DenseSet<DebugAggregate> *VarsWithStackSlot; + + // 0 = no memory location. + using BaseAddress = unsigned; + using OffsetInBitsTy = unsigned; + using FragTraits = IntervalMapHalfOpenInfo<OffsetInBitsTy>; + using FragsInMemMap = IntervalMap< + OffsetInBitsTy, BaseAddress, + IntervalMapImpl::NodeSizer<OffsetInBitsTy, BaseAddress>::LeafSize, + FragTraits>; + FragsInMemMap::Allocator IntervalMapAlloc; + using VarFragMap = DenseMap<unsigned, FragsInMemMap>; + + /// IDs for memory location base addresses in maps. Use 0 to indicate that + /// there's no memory location. + UniqueVector<Value *> Bases; + UniqueVector<DebugAggregate> Aggregates; + DenseMap<const BasicBlock *, VarFragMap> LiveIn; + DenseMap<const BasicBlock *, VarFragMap> LiveOut; + + struct FragMemLoc { + unsigned Var; + unsigned Base; + unsigned OffsetInBits; + unsigned SizeInBits; + DebugLoc DL; + }; + using InsertMap = MapVector<Instruction *, SmallVector<FragMemLoc>>; + + /// BBInsertBeforeMap holds a description for the set of location defs to be + /// inserted after the analysis is complete. It is updated during the dataflow + /// and the entry for a block is CLEARED each time it is (re-)visited. After + /// the dataflow is complete, each block entry will contain the set of defs + /// calculated during the final (fixed-point) iteration. + DenseMap<const BasicBlock *, InsertMap> BBInsertBeforeMap; + + static bool intervalMapsAreEqual(const FragsInMemMap &A, + const FragsInMemMap &B) { + auto AIt = A.begin(), AEnd = A.end(); + auto BIt = B.begin(), BEnd = B.end(); + for (; AIt != AEnd; ++AIt, ++BIt) { + if (BIt == BEnd) + return false; // B has fewer elements than A. + if (AIt.start() != BIt.start() || AIt.stop() != BIt.stop()) + return false; // Interval is different. + if (*AIt != *BIt) + return false; // Value at interval is different. + } + // AIt == AEnd. Check BIt is also now at end. + return BIt == BEnd; + } + + static bool varFragMapsAreEqual(const VarFragMap &A, const VarFragMap &B) { + if (A.size() != B.size()) + return false; + for (const auto &APair : A) { + auto BIt = B.find(APair.first); + if (BIt == B.end()) + return false; + if (!intervalMapsAreEqual(APair.second, BIt->second)) + return false; + } + return true; + } + + /// Return a string for the value that \p BaseID represents. + std::string toString(unsigned BaseID) { + if (BaseID) + return Bases[BaseID]->getName().str(); + else + return "None"; + } + + /// Format string describing an FragsInMemMap (IntervalMap) interval. + std::string toString(FragsInMemMap::const_iterator It, bool Newline = true) { + std::string String; + std::stringstream S(String); + if (It.valid()) { + S << "[" << It.start() << ", " << It.stop() + << "): " << toString(It.value()); + } else { + S << "invalid iterator (end)"; + } + if (Newline) + S << "\n"; + return S.str(); + }; + + FragsInMemMap meetFragments(const FragsInMemMap &A, const FragsInMemMap &B) { + FragsInMemMap Result(IntervalMapAlloc); + for (auto AIt = A.begin(), AEnd = A.end(); AIt != AEnd; ++AIt) { + LLVM_DEBUG(dbgs() << "a " << toString(AIt)); + // This is basically copied from process() and inverted (process is + // performing something like a union whereas this is more of an + // intersect). + + // There's no work to do if interval `a` overlaps no fragments in map `B`. + if (!B.overlaps(AIt.start(), AIt.stop())) + continue; + + // Does StartBit intersect an existing fragment? + auto FirstOverlap = B.find(AIt.start()); + assert(FirstOverlap != B.end()); + bool IntersectStart = FirstOverlap.start() < AIt.start(); + LLVM_DEBUG(dbgs() << "- FirstOverlap " << toString(FirstOverlap, false) + << ", IntersectStart: " << IntersectStart << "\n"); + + // Does EndBit intersect an existing fragment? + auto LastOverlap = B.find(AIt.stop()); + bool IntersectEnd = + LastOverlap != B.end() && LastOverlap.start() < AIt.stop(); + LLVM_DEBUG(dbgs() << "- LastOverlap " << toString(LastOverlap, false) + << ", IntersectEnd: " << IntersectEnd << "\n"); + + // Check if both ends of `a` intersect the same interval `b`. + if (IntersectStart && IntersectEnd && FirstOverlap == LastOverlap) { + // Insert `a` (`a` is contained in `b`) if the values match. + // [ a ] + // [ - b - ] + // - + // [ r ] + LLVM_DEBUG(dbgs() << "- a is contained within " + << toString(FirstOverlap)); + if (*AIt && *AIt == *FirstOverlap) + Result.insert(AIt.start(), AIt.stop(), *AIt); + } else { + // There's an overlap but `a` is not fully contained within + // `b`. Shorten any end-point intersections. + // [ - a - ] + // [ - b - ] + // - + // [ r ] + auto Next = FirstOverlap; + if (IntersectStart) { + LLVM_DEBUG(dbgs() << "- insert intersection of a and " + << toString(FirstOverlap)); + if (*AIt && *AIt == *FirstOverlap) + Result.insert(AIt.start(), FirstOverlap.stop(), *AIt); + ++Next; + } + // [ - a - ] + // [ - b - ] + // - + // [ r ] + if (IntersectEnd) { + LLVM_DEBUG(dbgs() << "- insert intersection of a and " + << toString(LastOverlap)); + if (*AIt && *AIt == *LastOverlap) + Result.insert(LastOverlap.start(), AIt.stop(), *AIt); + } + + // Insert all intervals in map `B` that are contained within interval + // `a` where the values match. + // [ - - a - - ] + // [ b1 ] [ b2 ] + // - + // [ r1 ] [ r2 ] + while (Next != B.end() && Next.start() < AIt.stop() && + Next.stop() <= AIt.stop()) { + LLVM_DEBUG(dbgs() + << "- insert intersection of a and " << toString(Next)); + if (*AIt && *AIt == *Next) + Result.insert(Next.start(), Next.stop(), *Next); + ++Next; + } + } + } + return Result; + } + + /// Meet \p A and \p B, storing the result in \p A. + void meetVars(VarFragMap &A, const VarFragMap &B) { + // Meet A and B. + // + // Result = meet(a, b) for a in A, b in B where Var(a) == Var(b) + for (auto It = A.begin(), End = A.end(); It != End; ++It) { + unsigned AVar = It->first; + FragsInMemMap &AFrags = It->second; + auto BIt = B.find(AVar); + if (BIt == B.end()) { + A.erase(It); + continue; // Var has no bits defined in B. + } + LLVM_DEBUG(dbgs() << "meet fragment maps for " + << Aggregates[AVar].first->getName() << "\n"); + AFrags = meetFragments(AFrags, BIt->second); + } + } + + bool meet(const BasicBlock &BB, + const SmallPtrSet<BasicBlock *, 16> &Visited) { + LLVM_DEBUG(dbgs() << "meet block info from preds of " << BB.getName() + << "\n"); + + VarFragMap BBLiveIn; + bool FirstMeet = true; + // LiveIn locs for BB is the meet of the already-processed preds' LiveOut + // locs. + for (auto I = pred_begin(&BB), E = pred_end(&BB); I != E; I++) { + // Ignore preds that haven't been processed yet. This is essentially the + // same as initialising all variables to implicit top value (⊤) which is + // the identity value for the meet operation. + const BasicBlock *Pred = *I; + if (!Visited.count(Pred)) + continue; + + auto PredLiveOut = LiveOut.find(Pred); + assert(PredLiveOut != LiveOut.end()); + + if (FirstMeet) { + LLVM_DEBUG(dbgs() << "BBLiveIn = " << Pred->getName() << "\n"); + BBLiveIn = PredLiveOut->second; + FirstMeet = false; + } else { + LLVM_DEBUG(dbgs() << "BBLiveIn = meet BBLiveIn, " << Pred->getName() + << "\n"); + meetVars(BBLiveIn, PredLiveOut->second); + } + + // An empty set is ⊥ for the intersect-like meet operation. If we've + // already got ⊥ there's no need to run the code - we know the result is + // ⊥ since `meet(a, ⊥) = ⊥`. + if (BBLiveIn.size() == 0) + break; + } + + auto CurrentLiveInEntry = LiveIn.find(&BB); + // If there's no LiveIn entry for the block yet, add it. + if (CurrentLiveInEntry == LiveIn.end()) { + LLVM_DEBUG(dbgs() << "change=true (first) on meet on " << BB.getName() + << "\n"); + LiveIn[&BB] = std::move(BBLiveIn); + return /*Changed=*/true; + } + + // If the LiveIn set has changed (expensive check) update it and return + // true. + if (!varFragMapsAreEqual(BBLiveIn, CurrentLiveInEntry->second)) { + LLVM_DEBUG(dbgs() << "change=true on meet on " << BB.getName() << "\n"); + CurrentLiveInEntry->second = std::move(BBLiveIn); + return /*Changed=*/true; + } + + LLVM_DEBUG(dbgs() << "change=false on meet on " << BB.getName() << "\n"); + return /*Changed=*/false; + } + + void insertMemLoc(BasicBlock &BB, Instruction &Before, unsigned Var, + unsigned StartBit, unsigned EndBit, unsigned Base, + DebugLoc DL) { + assert(StartBit < EndBit && "Cannot create fragment of size <= 0"); + if (!Base) + return; + FragMemLoc Loc; + Loc.Var = Var; + Loc.OffsetInBits = StartBit; + Loc.SizeInBits = EndBit - StartBit; + assert(Base && "Expected a non-zero ID for Base address"); + Loc.Base = Base; + Loc.DL = DL; + BBInsertBeforeMap[&BB][&Before].push_back(Loc); + LLVM_DEBUG(dbgs() << "Add mem def for " << Aggregates[Var].first->getName() + << " bits [" << StartBit << ", " << EndBit << ")\n"); + } + + void addDef(const VarLocInfo &VarLoc, Instruction &Before, BasicBlock &BB, + VarFragMap &LiveSet) { + DebugVariable DbgVar = FnVarLocs->getVariable(VarLoc.VariableID); + if (skipVariable(DbgVar.getVariable())) + return; + // Don't bother doing anything for this variables if we know it's fully + // promoted. We're only interested in variables that (sometimes) live on + // the stack here. + if (!VarsWithStackSlot->count(getAggregate(DbgVar))) + return; + unsigned Var = Aggregates.insert( + DebugAggregate(DbgVar.getVariable(), VarLoc.DL.getInlinedAt())); + + // [StartBit: EndBit) are the bits affected by this def. + const DIExpression *DIExpr = VarLoc.Expr; + unsigned StartBit; + unsigned EndBit; + if (auto Frag = DIExpr->getFragmentInfo()) { + StartBit = Frag->OffsetInBits; + EndBit = StartBit + Frag->SizeInBits; + } else { + assert(static_cast<bool>(DbgVar.getVariable()->getSizeInBits())); + StartBit = 0; + EndBit = *DbgVar.getVariable()->getSizeInBits(); + } + + // We will only fill fragments for simple memory-describing dbg.value + // intrinsics. If the fragment offset is the same as the offset from the + // base pointer, do The Thing, otherwise fall back to normal dbg.value + // behaviour. AssignmentTrackingLowering has generated DIExpressions + // written in terms of the base pointer. + // TODO: Remove this condition since the fragment offset doesn't always + // equal the offset from base pointer (e.g. for a SROA-split variable). + const auto DerefOffsetInBytes = getDerefOffsetInBytes(DIExpr); + const unsigned Base = + DerefOffsetInBytes && *DerefOffsetInBytes * 8 == StartBit + ? Bases.insert(VarLoc.V) + : 0; + LLVM_DEBUG(dbgs() << "DEF " << DbgVar.getVariable()->getName() << " [" + << StartBit << ", " << EndBit << "): " << toString(Base) + << "\n"); + + // First of all, any locs that use mem that are disrupted need reinstating. + // Unfortunately, IntervalMap doesn't let us insert intervals that overlap + // with existing intervals so this code involves a lot of fiddling around + // with intervals to do that manually. + auto FragIt = LiveSet.find(Var); + + // Check if the variable does not exist in the map. + if (FragIt == LiveSet.end()) { + // Add this variable to the BB map. + auto P = LiveSet.try_emplace(Var, FragsInMemMap(IntervalMapAlloc)); + assert(P.second && "Var already in map?"); + // Add the interval to the fragment map. + P.first->second.insert(StartBit, EndBit, Base); + return; + } + // The variable has an entry in the map. + + FragsInMemMap &FragMap = FragIt->second; + // First check the easy case: the new fragment `f` doesn't overlap with any + // intervals. + if (!FragMap.overlaps(StartBit, EndBit)) { + LLVM_DEBUG(dbgs() << "- No overlaps\n"); + FragMap.insert(StartBit, EndBit, Base); + return; + } + // There is at least one overlap. + + // Does StartBit intersect an existing fragment? + auto FirstOverlap = FragMap.find(StartBit); + assert(FirstOverlap != FragMap.end()); + bool IntersectStart = FirstOverlap.start() < StartBit; + + // Does EndBit intersect an existing fragment? + auto LastOverlap = FragMap.find(EndBit); + bool IntersectEnd = LastOverlap.valid() && LastOverlap.start() < EndBit; + + // Check if both ends of `f` intersect the same interval `i`. + if (IntersectStart && IntersectEnd && FirstOverlap == LastOverlap) { + LLVM_DEBUG(dbgs() << "- Intersect single interval @ both ends\n"); + // Shorten `i` so that there's space to insert `f`. + // [ f ] + // [ - i - ] + // + + // [ i ][ f ][ i ] + + // Save values for use after inserting a new interval. + auto EndBitOfOverlap = FirstOverlap.stop(); + unsigned OverlapValue = FirstOverlap.value(); + + // Shorten the overlapping interval. + FirstOverlap.setStop(StartBit); + insertMemLoc(BB, Before, Var, FirstOverlap.start(), StartBit, + OverlapValue, VarLoc.DL); + + // Insert a new interval to represent the end part. + FragMap.insert(EndBit, EndBitOfOverlap, OverlapValue); + insertMemLoc(BB, Before, Var, EndBit, EndBitOfOverlap, OverlapValue, + VarLoc.DL); + + // Insert the new (middle) fragment now there is space. + FragMap.insert(StartBit, EndBit, Base); + } else { + // There's an overlap but `f` may not be fully contained within + // `i`. Shorten any end-point intersections so that we can then + // insert `f`. + // [ - f - ] + // [ - i - ] + // | | + // [ i ] + // Shorten any end-point intersections. + if (IntersectStart) { + LLVM_DEBUG(dbgs() << "- Intersect interval at start\n"); + // Split off at the intersection. + FirstOverlap.setStop(StartBit); + insertMemLoc(BB, Before, Var, FirstOverlap.start(), StartBit, + *FirstOverlap, VarLoc.DL); + } + // [ - f - ] + // [ - i - ] + // | | + // [ i ] + if (IntersectEnd) { + LLVM_DEBUG(dbgs() << "- Intersect interval at end\n"); + // Split off at the intersection. + LastOverlap.setStart(EndBit); + insertMemLoc(BB, Before, Var, EndBit, LastOverlap.stop(), *LastOverlap, + VarLoc.DL); + } + + LLVM_DEBUG(dbgs() << "- Erase intervals contained within\n"); + // FirstOverlap and LastOverlap have been shortened such that they're + // no longer overlapping with [StartBit, EndBit). Delete any overlaps + // that remain (these will be fully contained within `f`). + // [ - f - ] } + // [ - i - ] } Intersection shortening that has happened above. + // | | } + // [ i ] } + // ----------------- + // [i2 ] } Intervals fully contained within `f` get erased. + // ----------------- + // [ - f - ][ i ] } Completed insertion. + auto It = FirstOverlap; + if (IntersectStart) + ++It; // IntersectStart: first overlap has been shortened. + while (It.valid() && It.start() >= StartBit && It.stop() <= EndBit) { + LLVM_DEBUG(dbgs() << "- Erase " << toString(It)); + It.erase(); // This increments It after removing the interval. + } + // We've dealt with all the overlaps now! + assert(!FragMap.overlaps(StartBit, EndBit)); + LLVM_DEBUG(dbgs() << "- Insert DEF into now-empty space\n"); + FragMap.insert(StartBit, EndBit, Base); + } + } + + bool skipVariable(const DILocalVariable *V) { return !V->getSizeInBits(); } + + void process(BasicBlock &BB, VarFragMap &LiveSet) { + BBInsertBeforeMap[&BB].clear(); + for (auto &I : BB) { + if (const auto *Locs = FnVarLocs->getWedge(&I)) { + for (const VarLocInfo &Loc : *Locs) { + addDef(Loc, I, *I.getParent(), LiveSet); + } + } + } + } + +public: + MemLocFragmentFill(Function &Fn, + const DenseSet<DebugAggregate> *VarsWithStackSlot) + : Fn(Fn), VarsWithStackSlot(VarsWithStackSlot) {} + + /// Add variable locations to \p FnVarLocs so that any bits of a variable + /// with a memory location have that location explicitly reinstated at each + /// subsequent variable location definition that that doesn't overwrite those + /// bits. i.e. after a variable location def, insert new defs for the memory + /// location with fragments for the difference of "all bits currently in + /// memory" and "the fragment of the second def". e.g. + /// + /// Before: + /// + /// var x bits 0 to 63: value in memory + /// more instructions + /// var x bits 0 to 31: value is %0 + /// + /// After: + /// + /// var x bits 0 to 63: value in memory + /// more instructions + /// var x bits 0 to 31: value is %0 + /// var x bits 32 to 61: value in memory ; <-- new loc def + /// + void run(FunctionVarLocsBuilder *FnVarLocs) { + if (!EnableMemLocFragFill) + return; + + this->FnVarLocs = FnVarLocs; + + // Prepare for traversal. + // + ReversePostOrderTraversal<Function *> RPOT(&Fn); + std::priority_queue<unsigned int, std::vector<unsigned int>, + std::greater<unsigned int>> + Worklist; + std::priority_queue<unsigned int, std::vector<unsigned int>, + std::greater<unsigned int>> + Pending; + DenseMap<unsigned int, BasicBlock *> OrderToBB; + DenseMap<BasicBlock *, unsigned int> BBToOrder; + { // Init OrderToBB and BBToOrder. + unsigned int RPONumber = 0; + for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) { + OrderToBB[RPONumber] = *RI; + BBToOrder[*RI] = RPONumber; + Worklist.push(RPONumber); + ++RPONumber; + } + LiveIn.init(RPONumber); + LiveOut.init(RPONumber); + } + + // Perform the traversal. + // + // This is a standard "intersect of predecessor outs" dataflow problem. To + // solve it, we perform meet() and process() using the two worklist method + // until the LiveIn data for each block becomes unchanging. + // + // This dataflow is essentially working on maps of sets and at each meet we + // intersect the maps and the mapped sets. So, initialized live-in maps + // monotonically decrease in value throughout the dataflow. + SmallPtrSet<BasicBlock *, 16> Visited; + while (!Worklist.empty() || !Pending.empty()) { + // We track what is on the pending worklist to avoid inserting the same + // thing twice. We could avoid this with a custom priority queue, but + // this is probably not worth it. + SmallPtrSet<BasicBlock *, 16> OnPending; + LLVM_DEBUG(dbgs() << "Processing Worklist\n"); + while (!Worklist.empty()) { + BasicBlock *BB = OrderToBB[Worklist.top()]; + LLVM_DEBUG(dbgs() << "\nPop BB " << BB->getName() << "\n"); + Worklist.pop(); + bool InChanged = meet(*BB, Visited); + // Always consider LiveIn changed on the first visit. + InChanged |= Visited.insert(BB).second; + if (InChanged) { + LLVM_DEBUG(dbgs() + << BB->getName() << " has new InLocs, process it\n"); + // Mutate a copy of LiveIn while processing BB. Once we've processed + // the terminator LiveSet is the LiveOut set for BB. + // This is an expensive copy! + VarFragMap LiveSet = LiveIn[BB]; + + // Process the instructions in the block. + process(*BB, LiveSet); + + // Relatively expensive check: has anything changed in LiveOut for BB? + if (!varFragMapsAreEqual(LiveOut[BB], LiveSet)) { + LLVM_DEBUG(dbgs() << BB->getName() + << " has new OutLocs, add succs to worklist: [ "); + LiveOut[BB] = std::move(LiveSet); + for (auto I = succ_begin(BB), E = succ_end(BB); I != E; I++) { + if (OnPending.insert(*I).second) { + LLVM_DEBUG(dbgs() << I->getName() << " "); + Pending.push(BBToOrder[*I]); + } + } + LLVM_DEBUG(dbgs() << "]\n"); + } + } + } + Worklist.swap(Pending); + // At this point, pending must be empty, since it was just the empty + // worklist + assert(Pending.empty() && "Pending should be empty"); + } + + // Insert new location defs. + for (auto Pair : BBInsertBeforeMap) { + InsertMap &Map = Pair.second; + for (auto Pair : Map) { + Instruction *InsertBefore = Pair.first; + assert(InsertBefore && "should never be null"); + auto FragMemLocs = Pair.second; + auto &Ctx = Fn.getContext(); + + for (auto FragMemLoc : FragMemLocs) { + DIExpression *Expr = DIExpression::get(Ctx, std::nullopt); + Expr = *DIExpression::createFragmentExpression( + Expr, FragMemLoc.OffsetInBits, FragMemLoc.SizeInBits); + Expr = DIExpression::prepend(Expr, DIExpression::DerefAfter, + FragMemLoc.OffsetInBits / 8); + DebugVariable Var(Aggregates[FragMemLoc.Var].first, Expr, + FragMemLoc.DL.getInlinedAt()); + FnVarLocs->addVarLoc(InsertBefore, Var, Expr, FragMemLoc.DL, + Bases[FragMemLoc.Base]); + } + } + } + } +}; + +/// AssignmentTrackingLowering encapsulates a dataflow analysis over a function +/// that interprets assignment tracking debug info metadata and stores in IR to +/// create a map of variable locations. +class AssignmentTrackingLowering { +public: + /// The kind of location in use for a variable, where Mem is the stack home, + /// Val is an SSA value or const, and None means that there is not one single + /// kind (either because there are multiple or because there is none; it may + /// prove useful to split this into two values in the future). + /// + /// LocKind is a join-semilattice with the partial order: + /// None > Mem, Val + /// + /// i.e. + /// join(Mem, Mem) = Mem + /// join(Val, Val) = Val + /// join(Mem, Val) = None + /// join(None, Mem) = None + /// join(None, Val) = None + /// join(None, None) = None + /// + /// Note: the order is not `None > Val > Mem` because we're using DIAssignID + /// to name assignments and are not tracking the actual stored values. + /// Therefore currently there's no way to ensure that Mem values and Val + /// values are the same. This could be a future extension, though it's not + /// clear that many additional locations would be recovered that way in + /// practice as the likelihood of this sitation arising naturally seems + /// incredibly low. + enum class LocKind { Mem, Val, None }; + + /// An abstraction of the assignment of a value to a variable or memory + /// location. + /// + /// An Assignment is Known or NoneOrPhi. A Known Assignment means we have a + /// DIAssignID ptr that represents it. NoneOrPhi means that we don't (or + /// can't) know the ID of the last assignment that took place. + /// + /// The Status of the Assignment (Known or NoneOrPhi) is another + /// join-semilattice. The partial order is: + /// NoneOrPhi > Known {id_0, id_1, ...id_N} + /// + /// i.e. for all values x and y where x != y: + /// join(x, x) = x + /// join(x, y) = NoneOrPhi + struct Assignment { + enum S { Known, NoneOrPhi } Status; + /// ID of the assignment. nullptr if Status is not Known. + DIAssignID *ID; + /// The dbg.assign that marks this dbg-def. Mem-defs don't use this field. + /// May be nullptr. + DbgAssignIntrinsic *Source; + + bool isSameSourceAssignment(const Assignment &Other) const { + // Don't include Source in the equality check. Assignments are + // defined by their ID, not debug intrinsic(s). + return std::tie(Status, ID) == std::tie(Other.Status, Other.ID); + } + void dump(raw_ostream &OS) { + static const char *LUT[] = {"Known", "NoneOrPhi"}; + OS << LUT[Status] << "(id="; + if (ID) + OS << ID; + else + OS << "null"; + OS << ", s="; + if (Source) + OS << *Source; + else + OS << "null"; + OS << ")"; + } + + static Assignment make(DIAssignID *ID, DbgAssignIntrinsic *Source) { + return Assignment(Known, ID, Source); + } + static Assignment makeFromMemDef(DIAssignID *ID) { + return Assignment(Known, ID, nullptr); + } + static Assignment makeNoneOrPhi() { + return Assignment(NoneOrPhi, nullptr, nullptr); + } + // Again, need a Top value? + Assignment() + : Status(NoneOrPhi), ID(nullptr), Source(nullptr) { + } // Can we delete this? + Assignment(S Status, DIAssignID *ID, DbgAssignIntrinsic *Source) + : Status(Status), ID(ID), Source(Source) { + // If the Status is Known then we expect there to be an assignment ID. + assert(Status == NoneOrPhi || ID); + } + }; + + using AssignmentMap = DenseMap<VariableID, Assignment>; + using LocMap = DenseMap<VariableID, LocKind>; + using OverlapMap = DenseMap<VariableID, SmallVector<VariableID, 4>>; + using UntaggedStoreAssignmentMap = + DenseMap<const Instruction *, + SmallVector<std::pair<VariableID, at::AssignmentInfo>>>; + +private: + /// Map a variable to the set of variables that it fully contains. + OverlapMap VarContains; + /// Map untagged stores to the variable fragments they assign to. Used by + /// processUntaggedInstruction. + UntaggedStoreAssignmentMap UntaggedStoreVars; + + // Machinery to defer inserting dbg.values. + using InsertMap = MapVector<Instruction *, SmallVector<VarLocInfo>>; + InsertMap InsertBeforeMap; + /// Clear the location definitions currently cached for insertion after /p + /// After. + void resetInsertionPoint(Instruction &After); + void emitDbgValue(LocKind Kind, const DbgVariableIntrinsic *Source, + Instruction *After); + + static bool mapsAreEqual(const AssignmentMap &A, const AssignmentMap &B) { + if (A.size() != B.size()) + return false; + for (const auto &Pair : A) { + VariableID Var = Pair.first; + const Assignment &AV = Pair.second; + auto R = B.find(Var); + // Check if this entry exists in B, otherwise ret false. + if (R == B.end()) + return false; + // Check that the assignment value is the same. + if (!AV.isSameSourceAssignment(R->second)) + return false; + } + return true; + } + + /// Represents the stack and debug assignments in a block. Used to describe + /// the live-in and live-out values for blocks, as well as the "current" + /// value as we process each instruction in a block. + struct BlockInfo { + /// Dominating assignment to memory for each variable. + AssignmentMap StackHomeValue; + /// Dominating assignemnt to each variable. + AssignmentMap DebugValue; + /// Location kind for each variable. LiveLoc indicates whether the + /// dominating assignment in StackHomeValue (LocKind::Mem), DebugValue + /// (LocKind::Val), or neither (LocKind::None) is valid, in that order of + /// preference. This cannot be derived by inspecting DebugValue and + /// StackHomeValue due to the fact that there's no distinction in + /// Assignment (the class) between whether an assignment is unknown or a + /// merge of multiple assignments (both are Status::NoneOrPhi). In other + /// words, the memory location may well be valid while both DebugValue and + /// StackHomeValue contain Assignments that have a Status of NoneOrPhi. + LocMap LiveLoc; + + /// Compare every element in each map to determine structural equality + /// (slow). + bool operator==(const BlockInfo &Other) const { + return LiveLoc == Other.LiveLoc && + mapsAreEqual(StackHomeValue, Other.StackHomeValue) && + mapsAreEqual(DebugValue, Other.DebugValue); + } + bool operator!=(const BlockInfo &Other) const { return !(*this == Other); } + bool isValid() { + return LiveLoc.size() == DebugValue.size() && + LiveLoc.size() == StackHomeValue.size(); + } + }; + + Function &Fn; + const DataLayout &Layout; + const DenseSet<DebugAggregate> *VarsWithStackSlot; + FunctionVarLocsBuilder *FnVarLocs; + DenseMap<const BasicBlock *, BlockInfo> LiveIn; + DenseMap<const BasicBlock *, BlockInfo> LiveOut; + + /// Helper for process methods to track variables touched each frame. + DenseSet<VariableID> VarsTouchedThisFrame; + + /// The set of variables that sometimes are not located in their stack home. + DenseSet<DebugAggregate> NotAlwaysStackHomed; + + VariableID getVariableID(const DebugVariable &Var) { + return static_cast<VariableID>(FnVarLocs->insertVariable(Var)); + } + + /// Join the LiveOut values of preds that are contained in \p Visited into + /// LiveIn[BB]. Return True if LiveIn[BB] has changed as a result. LiveIn[BB] + /// values monotonically increase. See the @link joinMethods join methods + /// @endlink documentation for more info. + bool join(const BasicBlock &BB, const SmallPtrSet<BasicBlock *, 16> &Visited); + ///@name joinMethods + /// Functions that implement `join` (the least upper bound) for the + /// join-semilattice types used in the dataflow. There is an explicit bottom + /// value (⊥) for some types and and explicit top value (⊤) for all types. + /// By definition: + /// + /// Join(A, B) >= A && Join(A, B) >= B + /// Join(A, ⊥) = A + /// Join(A, ⊤) = ⊤ + /// + /// These invariants are important for monotonicity. + /// + /// For the map-type functions, all unmapped keys in an empty map are + /// associated with a bottom value (⊥). This represents their values being + /// unknown. Unmapped keys in non-empty maps (joining two maps with a key + /// only present in one) represents either a variable going out of scope or + /// dropped debug info. It is assumed the key is associated with a top value + /// (⊤) in this case (unknown location / assignment). + ///@{ + static LocKind joinKind(LocKind A, LocKind B); + static LocMap joinLocMap(const LocMap &A, const LocMap &B); + static Assignment joinAssignment(const Assignment &A, const Assignment &B); + static AssignmentMap joinAssignmentMap(const AssignmentMap &A, + const AssignmentMap &B); + static BlockInfo joinBlockInfo(const BlockInfo &A, const BlockInfo &B); + ///@} + + /// Process the instructions in \p BB updating \p LiveSet along the way. \p + /// LiveSet must be initialized with the current live-in locations before + /// calling this. + void process(BasicBlock &BB, BlockInfo *LiveSet); + ///@name processMethods + /// Methods to process instructions in order to update the LiveSet (current + /// location information). + ///@{ + void processNonDbgInstruction(Instruction &I, BlockInfo *LiveSet); + void processDbgInstruction(Instruction &I, BlockInfo *LiveSet); + /// Update \p LiveSet after encountering an instruction with a DIAssignID + /// attachment, \p I. + void processTaggedInstruction(Instruction &I, BlockInfo *LiveSet); + /// Update \p LiveSet after encountering an instruciton without a DIAssignID + /// attachment, \p I. + void processUntaggedInstruction(Instruction &I, BlockInfo *LiveSet); + void processDbgAssign(DbgAssignIntrinsic &DAI, BlockInfo *LiveSet); + void processDbgValue(DbgValueInst &DVI, BlockInfo *LiveSet); + /// Add an assignment to memory for the variable /p Var. + void addMemDef(BlockInfo *LiveSet, VariableID Var, const Assignment &AV); + /// Add an assignment to the variable /p Var. + void addDbgDef(BlockInfo *LiveSet, VariableID Var, const Assignment &AV); + ///@} + + /// Set the LocKind for \p Var. + void setLocKind(BlockInfo *LiveSet, VariableID Var, LocKind K); + /// Get the live LocKind for a \p Var. Requires addMemDef or addDbgDef to + /// have been called for \p Var first. + LocKind getLocKind(BlockInfo *LiveSet, VariableID Var); + /// Return true if \p Var has an assignment in \p M matching \p AV. + bool hasVarWithAssignment(VariableID Var, const Assignment &AV, + const AssignmentMap &M); + + /// Emit info for variables that are fully promoted. + bool emitPromotedVarLocs(FunctionVarLocsBuilder *FnVarLocs); + +public: + AssignmentTrackingLowering(Function &Fn, const DataLayout &Layout, + const DenseSet<DebugAggregate> *VarsWithStackSlot) + : Fn(Fn), Layout(Layout), VarsWithStackSlot(VarsWithStackSlot) {} + /// Run the analysis, adding variable location info to \p FnVarLocs. Returns + /// true if any variable locations have been added to FnVarLocs. + bool run(FunctionVarLocsBuilder *FnVarLocs); +}; +} // namespace + +void AssignmentTrackingLowering::setLocKind(BlockInfo *LiveSet, VariableID Var, + LocKind K) { + auto SetKind = [this](BlockInfo *LiveSet, VariableID Var, LocKind K) { + VarsTouchedThisFrame.insert(Var); + LiveSet->LiveLoc[Var] = K; + }; + SetKind(LiveSet, Var, K); + + // Update the LocKind for all fragments contained within Var. + for (VariableID Frag : VarContains[Var]) + SetKind(LiveSet, Frag, K); +} + +AssignmentTrackingLowering::LocKind +AssignmentTrackingLowering::getLocKind(BlockInfo *LiveSet, VariableID Var) { + auto Pair = LiveSet->LiveLoc.find(Var); + assert(Pair != LiveSet->LiveLoc.end()); + return Pair->second; +} + +void AssignmentTrackingLowering::addMemDef(BlockInfo *LiveSet, VariableID Var, + const Assignment &AV) { + auto AddDef = [](BlockInfo *LiveSet, VariableID Var, Assignment AV) { + LiveSet->StackHomeValue[Var] = AV; + // Add default (Var -> ⊤) to DebugValue if Var isn't in DebugValue yet. + LiveSet->DebugValue.insert({Var, Assignment::makeNoneOrPhi()}); + // Add default (Var -> ⊤) to LiveLocs if Var isn't in LiveLocs yet. Callers + // of addMemDef will call setLocKind to override. + LiveSet->LiveLoc.insert({Var, LocKind::None}); + }; + AddDef(LiveSet, Var, AV); + + // Use this assigment for all fragments contained within Var, but do not + // provide a Source because we cannot convert Var's value to a value for the + // fragment. + Assignment FragAV = AV; + FragAV.Source = nullptr; + for (VariableID Frag : VarContains[Var]) + AddDef(LiveSet, Frag, FragAV); +} + +void AssignmentTrackingLowering::addDbgDef(BlockInfo *LiveSet, VariableID Var, + const Assignment &AV) { + auto AddDef = [](BlockInfo *LiveSet, VariableID Var, Assignment AV) { + LiveSet->DebugValue[Var] = AV; + // Add default (Var -> ⊤) to StackHome if Var isn't in StackHome yet. + LiveSet->StackHomeValue.insert({Var, Assignment::makeNoneOrPhi()}); + // Add default (Var -> ⊤) to LiveLocs if Var isn't in LiveLocs yet. Callers + // of addDbgDef will call setLocKind to override. + LiveSet->LiveLoc.insert({Var, LocKind::None}); + }; + AddDef(LiveSet, Var, AV); + + // Use this assigment for all fragments contained within Var, but do not + // provide a Source because we cannot convert Var's value to a value for the + // fragment. + Assignment FragAV = AV; + FragAV.Source = nullptr; + for (VariableID Frag : VarContains[Var]) + AddDef(LiveSet, Frag, FragAV); +} + +static DIAssignID *getIDFromInst(const Instruction &I) { + return cast<DIAssignID>(I.getMetadata(LLVMContext::MD_DIAssignID)); +} + +static DIAssignID *getIDFromMarker(const DbgAssignIntrinsic &DAI) { + return cast<DIAssignID>(DAI.getAssignID()); +} + +/// Return true if \p Var has an assignment in \p M matching \p AV. +bool AssignmentTrackingLowering::hasVarWithAssignment(VariableID Var, + const Assignment &AV, + const AssignmentMap &M) { + auto AssignmentIsMapped = [](VariableID Var, const Assignment &AV, + const AssignmentMap &M) { + auto R = M.find(Var); + if (R == M.end()) + return false; + return AV.isSameSourceAssignment(R->second); + }; + + if (!AssignmentIsMapped(Var, AV, M)) + return false; + + // Check all the frags contained within Var as these will have all been + // mapped to AV at the last store to Var. + for (VariableID Frag : VarContains[Var]) + if (!AssignmentIsMapped(Frag, AV, M)) + return false; + return true; +} + +#ifndef NDEBUG +const char *locStr(AssignmentTrackingLowering::LocKind Loc) { + using LocKind = AssignmentTrackingLowering::LocKind; + switch (Loc) { + case LocKind::Val: + return "Val"; + case LocKind::Mem: + return "Mem"; + case LocKind::None: + return "None"; + }; + llvm_unreachable("unknown LocKind"); +} +#endif + +void AssignmentTrackingLowering::emitDbgValue( + AssignmentTrackingLowering::LocKind Kind, + const DbgVariableIntrinsic *Source, Instruction *After) { + + DILocation *DL = Source->getDebugLoc(); + auto Emit = [this, Source, After, DL](Value *Val, DIExpression *Expr) { + assert(Expr); + if (!Val) + Val = PoisonValue::get(Type::getInt1Ty(Source->getContext())); + + // Find a suitable insert point. + Instruction *InsertBefore = After->getNextNode(); + assert(InsertBefore && "Shouldn't be inserting after a terminator"); + + VariableID Var = getVariableID(DebugVariable(Source)); + VarLocInfo VarLoc; + VarLoc.VariableID = static_cast<VariableID>(Var); + VarLoc.Expr = Expr; + VarLoc.V = Val; + VarLoc.DL = DL; + // Insert it into the map for later. + InsertBeforeMap[InsertBefore].push_back(VarLoc); + }; + + // NOTE: This block can mutate Kind. + if (Kind == LocKind::Mem) { + const auto *DAI = cast<DbgAssignIntrinsic>(Source); + // Check the address hasn't been dropped (e.g. the debug uses may not have + // been replaced before deleting a Value). + if (DAI->isKillAddress()) { + // The address isn't valid so treat this as a non-memory def. + Kind = LocKind::Val; + } else { + Value *Val = DAI->getAddress(); + DIExpression *Expr = DAI->getAddressExpression(); + assert(!Expr->getFragmentInfo() && + "fragment info should be stored in value-expression only"); + // Copy the fragment info over from the value-expression to the new + // DIExpression. + if (auto OptFragInfo = Source->getExpression()->getFragmentInfo()) { + auto FragInfo = *OptFragInfo; + Expr = *DIExpression::createFragmentExpression( + Expr, FragInfo.OffsetInBits, FragInfo.SizeInBits); + } + // The address-expression has an implicit deref, add it now. + std::tie(Val, Expr) = + walkToAllocaAndPrependOffsetDeref(Layout, Val, Expr); + Emit(Val, Expr); + return; + } + } + + if (Kind == LocKind::Val) { + /// Get the value component, converting to Undef if it is variadic. + Value *Val = + Source->hasArgList() ? nullptr : Source->getVariableLocationOp(0); + Emit(Val, Source->getExpression()); + return; + } + + if (Kind == LocKind::None) { + Emit(nullptr, Source->getExpression()); + return; + } +} + +void AssignmentTrackingLowering::processNonDbgInstruction( + Instruction &I, AssignmentTrackingLowering::BlockInfo *LiveSet) { + if (I.hasMetadata(LLVMContext::MD_DIAssignID)) + processTaggedInstruction(I, LiveSet); + else + processUntaggedInstruction(I, LiveSet); +} + +void AssignmentTrackingLowering::processUntaggedInstruction( + Instruction &I, AssignmentTrackingLowering::BlockInfo *LiveSet) { + // Interpret stack stores that are not tagged as an assignment in memory for + // the variables associated with that address. These stores may not be tagged + // because a) the store cannot be represented using dbg.assigns (non-const + // length or offset) or b) the tag was accidentally dropped during + // optimisations. For these stores we fall back to assuming that the stack + // home is a valid location for the variables. The benefit is that this + // prevents us missing an assignment and therefore incorrectly maintaining + // earlier location definitions, and in many cases it should be a reasonable + // assumption. However, this will occasionally lead to slight + // inaccuracies. The value of a hoisted untagged store will be visible + // "early", for example. + assert(!I.hasMetadata(LLVMContext::MD_DIAssignID)); + auto It = UntaggedStoreVars.find(&I); + if (It == UntaggedStoreVars.end()) + return; // No variables associated with the store destination. + + LLVM_DEBUG(dbgs() << "processUntaggedInstruction on UNTAGGED INST " << I + << "\n"); + // Iterate over the variables that this store affects, add a NoneOrPhi dbg + // and mem def, set lockind to Mem, and emit a location def for each. + for (auto [Var, Info] : It->second) { + // This instruction is treated as both a debug and memory assignment, + // meaning the memory location should be used. We don't have an assignment + // ID though so use Assignment::makeNoneOrPhi() to create an imaginary one. + addMemDef(LiveSet, Var, Assignment::makeNoneOrPhi()); + addDbgDef(LiveSet, Var, Assignment::makeNoneOrPhi()); + setLocKind(LiveSet, Var, LocKind::Mem); + LLVM_DEBUG(dbgs() << " setting Stack LocKind to: " << locStr(LocKind::Mem) + << "\n"); + // Build the dbg location def to insert. + // + // DIExpression: Add fragment and offset. + DebugVariable V = FnVarLocs->getVariable(Var); + DIExpression *DIE = DIExpression::get(I.getContext(), std::nullopt); + if (auto Frag = V.getFragment()) { + auto R = DIExpression::createFragmentExpression(DIE, Frag->OffsetInBits, + Frag->SizeInBits); + assert(R && "unexpected createFragmentExpression failure"); + DIE = *R; + } + SmallVector<uint64_t, 3> Ops; + if (Info.OffsetInBits) + Ops = {dwarf::DW_OP_plus_uconst, Info.OffsetInBits / 8}; + Ops.push_back(dwarf::DW_OP_deref); + DIE = DIExpression::prependOpcodes(DIE, Ops, /*StackValue=*/false, + /*EntryValue=*/false); + // Find a suitable insert point. + Instruction *InsertBefore = I.getNextNode(); + assert(InsertBefore && "Shouldn't be inserting after a terminator"); + + // Get DILocation for this unrecorded assignment. + DILocation *InlinedAt = const_cast<DILocation *>(V.getInlinedAt()); + const DILocation *DILoc = DILocation::get( + Fn.getContext(), 0, 0, V.getVariable()->getScope(), InlinedAt); + + VarLocInfo VarLoc; + VarLoc.VariableID = static_cast<VariableID>(Var); + VarLoc.Expr = DIE; + VarLoc.V = const_cast<AllocaInst *>(Info.Base); + VarLoc.DL = DILoc; + // 3. Insert it into the map for later. + InsertBeforeMap[InsertBefore].push_back(VarLoc); + } +} + +void AssignmentTrackingLowering::processTaggedInstruction( + Instruction &I, AssignmentTrackingLowering::BlockInfo *LiveSet) { + auto Linked = at::getAssignmentMarkers(&I); + // No dbg.assign intrinsics linked. + // FIXME: All vars that have a stack slot this store modifies that don't have + // a dbg.assign linked to it should probably treat this like an untagged + // store. + if (Linked.empty()) + return; + + LLVM_DEBUG(dbgs() << "processTaggedInstruction on " << I << "\n"); + for (DbgAssignIntrinsic *DAI : Linked) { + VariableID Var = getVariableID(DebugVariable(DAI)); + // Something has gone wrong if VarsWithStackSlot doesn't contain a variable + // that is linked to a store. + assert(VarsWithStackSlot->count(getAggregate(DAI)) && + "expected DAI's variable to have stack slot"); + + Assignment AV = Assignment::makeFromMemDef(getIDFromInst(I)); + addMemDef(LiveSet, Var, AV); + + LLVM_DEBUG(dbgs() << " linked to " << *DAI << "\n"); + LLVM_DEBUG(dbgs() << " LiveLoc " << locStr(getLocKind(LiveSet, Var)) + << " -> "); + + // The last assignment to the stack is now AV. Check if the last debug + // assignment has a matching Assignment. + if (hasVarWithAssignment(Var, AV, LiveSet->DebugValue)) { + // The StackHomeValue and DebugValue for this variable match so we can + // emit a stack home location here. + LLVM_DEBUG(dbgs() << "Mem, Stack matches Debug program\n";); + LLVM_DEBUG(dbgs() << " Stack val: "; AV.dump(dbgs()); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << " Debug val: "; + LiveSet->DebugValue[Var].dump(dbgs()); dbgs() << "\n"); + setLocKind(LiveSet, Var, LocKind::Mem); + emitDbgValue(LocKind::Mem, DAI, &I); + continue; + } + + // The StackHomeValue and DebugValue for this variable do not match. I.e. + // The value currently stored in the stack is not what we'd expect to + // see, so we cannot use emit a stack home location here. Now we will + // look at the live LocKind for the variable and determine an appropriate + // dbg.value to emit. + LocKind PrevLoc = getLocKind(LiveSet, Var); + switch (PrevLoc) { + case LocKind::Val: { + // The value in memory in memory has changed but we're not currently + // using the memory location. Do nothing. + LLVM_DEBUG(dbgs() << "Val, (unchanged)\n";); + setLocKind(LiveSet, Var, LocKind::Val); + } break; + case LocKind::Mem: { + // There's been an assignment to memory that we were using as a + // location for this variable, and the Assignment doesn't match what + // we'd expect to see in memory. + if (LiveSet->DebugValue[Var].Status == Assignment::NoneOrPhi) { + // We need to terminate any previously open location now. + LLVM_DEBUG(dbgs() << "None, No Debug value available\n";); + setLocKind(LiveSet, Var, LocKind::None); + emitDbgValue(LocKind::None, DAI, &I); + } else { + // The previous DebugValue Value can be used here. + LLVM_DEBUG(dbgs() << "Val, Debug value is Known\n";); + setLocKind(LiveSet, Var, LocKind::Val); + Assignment PrevAV = LiveSet->DebugValue.lookup(Var); + if (PrevAV.Source) { + emitDbgValue(LocKind::Val, PrevAV.Source, &I); + } else { + // PrevAV.Source is nullptr so we must emit undef here. + emitDbgValue(LocKind::None, DAI, &I); + } + } + } break; + case LocKind::None: { + // There's been an assignment to memory and we currently are + // not tracking a location for the variable. Do not emit anything. + LLVM_DEBUG(dbgs() << "None, (unchanged)\n";); + setLocKind(LiveSet, Var, LocKind::None); + } break; + } + } +} + +void AssignmentTrackingLowering::processDbgAssign(DbgAssignIntrinsic &DAI, + BlockInfo *LiveSet) { + // Only bother tracking variables that are at some point stack homed. Other + // variables can be dealt with trivially later. + if (!VarsWithStackSlot->count(getAggregate(&DAI))) + return; + + VariableID Var = getVariableID(DebugVariable(&DAI)); + Assignment AV = Assignment::make(getIDFromMarker(DAI), &DAI); + addDbgDef(LiveSet, Var, AV); + + LLVM_DEBUG(dbgs() << "processDbgAssign on " << DAI << "\n";); + LLVM_DEBUG(dbgs() << " LiveLoc " << locStr(getLocKind(LiveSet, Var)) + << " -> "); + + // Check if the DebugValue and StackHomeValue both hold the same + // Assignment. + if (hasVarWithAssignment(Var, AV, LiveSet->StackHomeValue)) { + // They match. We can use the stack home because the debug intrinsics state + // that an assignment happened here, and we know that specific assignment + // was the last one to take place in memory for this variable. + LocKind Kind; + if (DAI.isKillAddress()) { + LLVM_DEBUG( + dbgs() + << "Val, Stack matches Debug program but address is killed\n";); + Kind = LocKind::Val; + } else { + LLVM_DEBUG(dbgs() << "Mem, Stack matches Debug program\n";); + Kind = LocKind::Mem; + }; + setLocKind(LiveSet, Var, Kind); + emitDbgValue(Kind, &DAI, &DAI); + } else { + // The last assignment to the memory location isn't the one that we want to + // show to the user so emit a dbg.value(Value). Value may be undef. + LLVM_DEBUG(dbgs() << "Val, Stack contents is unknown\n";); + setLocKind(LiveSet, Var, LocKind::Val); + emitDbgValue(LocKind::Val, &DAI, &DAI); + } +} + +void AssignmentTrackingLowering::processDbgValue(DbgValueInst &DVI, + BlockInfo *LiveSet) { + // Only other tracking variables that are at some point stack homed. + // Other variables can be dealt with trivally later. + if (!VarsWithStackSlot->count(getAggregate(&DVI))) + return; + + VariableID Var = getVariableID(DebugVariable(&DVI)); + // We have no ID to create an Assignment with so we mark this assignment as + // NoneOrPhi. Note that the dbg.value still exists, we just cannot determine + // the assignment responsible for setting this value. + // This is fine; dbg.values are essentially interchangable with unlinked + // dbg.assigns, and some passes such as mem2reg and instcombine add them to + // PHIs for promoted variables. + Assignment AV = Assignment::makeNoneOrPhi(); + addDbgDef(LiveSet, Var, AV); + + LLVM_DEBUG(dbgs() << "processDbgValue on " << DVI << "\n";); + LLVM_DEBUG(dbgs() << " LiveLoc " << locStr(getLocKind(LiveSet, Var)) + << " -> Val, dbg.value override"); + + setLocKind(LiveSet, Var, LocKind::Val); + emitDbgValue(LocKind::Val, &DVI, &DVI); +} + +void AssignmentTrackingLowering::processDbgInstruction( + Instruction &I, AssignmentTrackingLowering::BlockInfo *LiveSet) { + assert(!isa<DbgAddrIntrinsic>(&I) && "unexpected dbg.addr"); + if (auto *DAI = dyn_cast<DbgAssignIntrinsic>(&I)) + processDbgAssign(*DAI, LiveSet); + else if (auto *DVI = dyn_cast<DbgValueInst>(&I)) + processDbgValue(*DVI, LiveSet); +} + +void AssignmentTrackingLowering::resetInsertionPoint(Instruction &After) { + assert(!After.isTerminator() && "Can't insert after a terminator"); + auto R = InsertBeforeMap.find(After.getNextNode()); + if (R == InsertBeforeMap.end()) + return; + R->second.clear(); +} + +void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) { + for (auto II = BB.begin(), EI = BB.end(); II != EI;) { + assert(VarsTouchedThisFrame.empty()); + // Process the instructions in "frames". A "frame" includes a single + // non-debug instruction followed any debug instructions before the + // next non-debug instruction. + if (!isa<DbgInfoIntrinsic>(&*II)) { + if (II->isTerminator()) + break; + resetInsertionPoint(*II); + processNonDbgInstruction(*II, LiveSet); + assert(LiveSet->isValid()); + ++II; + } + while (II != EI) { + if (!isa<DbgInfoIntrinsic>(&*II)) + break; + resetInsertionPoint(*II); + processDbgInstruction(*II, LiveSet); + assert(LiveSet->isValid()); + ++II; + } + + // We've processed everything in the "frame". Now determine which variables + // cannot be represented by a dbg.declare. + for (auto Var : VarsTouchedThisFrame) { + LocKind Loc = getLocKind(LiveSet, Var); + // If a variable's LocKind is anything other than LocKind::Mem then we + // must note that it cannot be represented with a dbg.declare. + // Note that this check is enough without having to check the result of + // joins() because for join to produce anything other than Mem after + // we've already seen a Mem we'd be joining None or Val with Mem. In that + // case, we've already hit this codepath when we set the LocKind to Val + // or None in that block. + if (Loc != LocKind::Mem) { + DebugVariable DbgVar = FnVarLocs->getVariable(Var); + DebugAggregate Aggr{DbgVar.getVariable(), DbgVar.getInlinedAt()}; + NotAlwaysStackHomed.insert(Aggr); + } + } + VarsTouchedThisFrame.clear(); + } +} + +AssignmentTrackingLowering::LocKind +AssignmentTrackingLowering::joinKind(LocKind A, LocKind B) { + // Partial order: + // None > Mem, Val + return A == B ? A : LocKind::None; +} + +AssignmentTrackingLowering::LocMap +AssignmentTrackingLowering::joinLocMap(const LocMap &A, const LocMap &B) { + // Join A and B. + // + // U = join(a, b) for a in A, b in B where Var(a) == Var(b) + // D = join(x, ⊤) for x where Var(x) is in A xor B + // Join = U ∪ D + // + // This is achieved by performing a join on elements from A and B with + // variables common to both A and B (join elements indexed by var intersect), + // then adding LocKind::None elements for vars in A xor B. The latter part is + // equivalent to performing join on elements with variables in A xor B with + // LocKind::None (⊤) since join(x, ⊤) = ⊤. + LocMap Join; + SmallVector<VariableID, 16> SymmetricDifference; + // Insert the join of the elements with common vars into Join. Add the + // remaining elements to into SymmetricDifference. + for (const auto &[Var, Loc] : A) { + // If this Var doesn't exist in B then add it to the symmetric difference + // set. + auto R = B.find(Var); + if (R == B.end()) { + SymmetricDifference.push_back(Var); + continue; + } + // There is an entry for Var in both, join it. + Join[Var] = joinKind(Loc, R->second); + } + unsigned IntersectSize = Join.size(); + (void)IntersectSize; + + // Add the elements in B with variables that are not in A into + // SymmetricDifference. + for (const auto &Pair : B) { + VariableID Var = Pair.first; + if (A.count(Var) == 0) + SymmetricDifference.push_back(Var); + } + + // Add SymmetricDifference elements to Join and return the result. + for (const auto &Var : SymmetricDifference) + Join.insert({Var, LocKind::None}); + + assert(Join.size() == (IntersectSize + SymmetricDifference.size())); + assert(Join.size() >= A.size() && Join.size() >= B.size()); + return Join; +} + +AssignmentTrackingLowering::Assignment +AssignmentTrackingLowering::joinAssignment(const Assignment &A, + const Assignment &B) { + // Partial order: + // NoneOrPhi(null, null) > Known(v, ?s) + + // If either are NoneOrPhi the join is NoneOrPhi. + // If either value is different then the result is + // NoneOrPhi (joining two values is a Phi). + if (!A.isSameSourceAssignment(B)) + return Assignment::makeNoneOrPhi(); + if (A.Status == Assignment::NoneOrPhi) + return Assignment::makeNoneOrPhi(); + + // Source is used to lookup the value + expression in the debug program if + // the stack slot gets assigned a value earlier than expected. Because + // we're only tracking the one dbg.assign, we can't capture debug PHIs. + // It's unlikely that we're losing out on much coverage by avoiding that + // extra work. + // The Source may differ in this situation: + // Pred.1: + // dbg.assign i32 0, ..., !1, ... + // Pred.2: + // dbg.assign i32 1, ..., !1, ... + // Here the same assignment (!1) was performed in both preds in the source, + // but we can't use either one unless they are identical (e.g. .we don't + // want to arbitrarily pick between constant values). + auto JoinSource = [&]() -> DbgAssignIntrinsic * { + if (A.Source == B.Source) + return A.Source; + if (A.Source == nullptr || B.Source == nullptr) + return nullptr; + if (A.Source->isIdenticalTo(B.Source)) + return A.Source; + return nullptr; + }; + DbgAssignIntrinsic *Source = JoinSource(); + assert(A.Status == B.Status && A.Status == Assignment::Known); + assert(A.ID == B.ID); + return Assignment::make(A.ID, Source); +} + +AssignmentTrackingLowering::AssignmentMap +AssignmentTrackingLowering::joinAssignmentMap(const AssignmentMap &A, + const AssignmentMap &B) { + // Join A and B. + // + // U = join(a, b) for a in A, b in B where Var(a) == Var(b) + // D = join(x, ⊤) for x where Var(x) is in A xor B + // Join = U ∪ D + // + // This is achieved by performing a join on elements from A and B with + // variables common to both A and B (join elements indexed by var intersect), + // then adding LocKind::None elements for vars in A xor B. The latter part is + // equivalent to performing join on elements with variables in A xor B with + // Status::NoneOrPhi (⊤) since join(x, ⊤) = ⊤. + AssignmentMap Join; + SmallVector<VariableID, 16> SymmetricDifference; + // Insert the join of the elements with common vars into Join. Add the + // remaining elements to into SymmetricDifference. + for (const auto &[Var, AV] : A) { + // If this Var doesn't exist in B then add it to the symmetric difference + // set. + auto R = B.find(Var); + if (R == B.end()) { + SymmetricDifference.push_back(Var); + continue; + } + // There is an entry for Var in both, join it. + Join[Var] = joinAssignment(AV, R->second); + } + unsigned IntersectSize = Join.size(); + (void)IntersectSize; + + // Add the elements in B with variables that are not in A into + // SymmetricDifference. + for (const auto &Pair : B) { + VariableID Var = Pair.first; + if (A.count(Var) == 0) + SymmetricDifference.push_back(Var); + } + + // Add SymmetricDifference elements to Join and return the result. + for (auto Var : SymmetricDifference) + Join.insert({Var, Assignment::makeNoneOrPhi()}); + + assert(Join.size() == (IntersectSize + SymmetricDifference.size())); + assert(Join.size() >= A.size() && Join.size() >= B.size()); + return Join; +} + +AssignmentTrackingLowering::BlockInfo +AssignmentTrackingLowering::joinBlockInfo(const BlockInfo &A, + const BlockInfo &B) { + BlockInfo Join; + Join.LiveLoc = joinLocMap(A.LiveLoc, B.LiveLoc); + Join.StackHomeValue = joinAssignmentMap(A.StackHomeValue, B.StackHomeValue); + Join.DebugValue = joinAssignmentMap(A.DebugValue, B.DebugValue); + assert(Join.isValid()); + return Join; +} + +bool AssignmentTrackingLowering::join( + const BasicBlock &BB, const SmallPtrSet<BasicBlock *, 16> &Visited) { + BlockInfo BBLiveIn; + bool FirstJoin = true; + // LiveIn locs for BB is the join of the already-processed preds' LiveOut + // locs. + for (auto I = pred_begin(&BB), E = pred_end(&BB); I != E; I++) { + // Ignore backedges if we have not visited the predecessor yet. As the + // predecessor hasn't yet had locations propagated into it, most locations + // will not yet be valid, so treat them as all being uninitialized and + // potentially valid. If a location guessed to be correct here is + // invalidated later, we will remove it when we revisit this block. This + // is essentially the same as initialising all LocKinds and Assignments to + // an implicit ⊥ value which is the identity value for the join operation. + const BasicBlock *Pred = *I; + if (!Visited.count(Pred)) + continue; + + auto PredLiveOut = LiveOut.find(Pred); + // Pred must have been processed already. See comment at start of this loop. + assert(PredLiveOut != LiveOut.end()); + + // Perform the join of BBLiveIn (current live-in info) and PrevLiveOut. + if (FirstJoin) + BBLiveIn = PredLiveOut->second; + else + BBLiveIn = joinBlockInfo(std::move(BBLiveIn), PredLiveOut->second); + FirstJoin = false; + } + + auto CurrentLiveInEntry = LiveIn.find(&BB); + // Check if there isn't an entry, or there is but the LiveIn set has changed + // (expensive check). + if (CurrentLiveInEntry == LiveIn.end() || + BBLiveIn != CurrentLiveInEntry->second) { + LiveIn[&BB] = std::move(BBLiveIn); + // A change has occured. + return true; + } + // No change. + return false; +} + +/// Return true if A fully contains B. +static bool fullyContains(DIExpression::FragmentInfo A, + DIExpression::FragmentInfo B) { + auto ALeft = A.OffsetInBits; + auto BLeft = B.OffsetInBits; + if (BLeft < ALeft) + return false; + + auto ARight = ALeft + A.SizeInBits; + auto BRight = BLeft + B.SizeInBits; + if (BRight > ARight) + return false; + return true; +} + +static std::optional<at::AssignmentInfo> +getUntaggedStoreAssignmentInfo(const Instruction &I, const DataLayout &Layout) { + // Don't bother checking if this is an AllocaInst. We know this + // instruction has no tag which means there are no variables associated + // with it. + if (const auto *SI = dyn_cast<StoreInst>(&I)) + return at::getAssignmentInfo(Layout, SI); + if (const auto *MI = dyn_cast<MemIntrinsic>(&I)) + return at::getAssignmentInfo(Layout, MI); + // Alloca or non-store-like inst. + return std::nullopt; +} + +/// Build a map of {Variable x: Variables y} where all variable fragments +/// contained within the variable fragment x are in set y. This means that +/// y does not contain all overlaps because partial overlaps are excluded. +/// +/// While we're iterating over the function, add single location defs for +/// dbg.declares to \p FnVarLocs +/// +/// Finally, populate UntaggedStoreVars with a mapping of untagged stores to +/// the stored-to variable fragments. +/// +/// These tasks are bundled together to reduce the number of times we need +/// to iterate over the function as they can be achieved together in one pass. +static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares( + Function &Fn, FunctionVarLocsBuilder *FnVarLocs, + AssignmentTrackingLowering::UntaggedStoreAssignmentMap &UntaggedStoreVars) { + DenseSet<DebugVariable> Seen; + // Map of Variable: [Fragments]. + DenseMap<DebugAggregate, SmallVector<DebugVariable, 8>> FragmentMap; + // Iterate over all instructions: + // - dbg.declare -> add single location variable record + // - dbg.* -> Add fragments to FragmentMap + // - untagged store -> Add fragments to FragmentMap and update + // UntaggedStoreVars. + // We need to add fragments for untagged stores too so that we can correctly + // clobber overlapped fragment locations later. + for (auto &BB : Fn) { + for (auto &I : BB) { + if (auto *DDI = dyn_cast<DbgDeclareInst>(&I)) { + FnVarLocs->addSingleLocVar(DebugVariable(DDI), DDI->getExpression(), + DDI->getDebugLoc(), DDI->getAddress()); + } else if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I)) { + DebugVariable DV = DebugVariable(DII); + DebugAggregate DA = {DV.getVariable(), DV.getInlinedAt()}; + if (Seen.insert(DV).second) + FragmentMap[DA].push_back(DV); + } else if (auto Info = getUntaggedStoreAssignmentInfo( + I, Fn.getParent()->getDataLayout())) { + // Find markers linked to this alloca. + for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(Info->Base)) { + // Discard the fragment if it covers the entire variable. + std::optional<DIExpression::FragmentInfo> FragInfo = + [&Info, DAI]() -> std::optional<DIExpression::FragmentInfo> { + DIExpression::FragmentInfo F; + F.OffsetInBits = Info->OffsetInBits; + F.SizeInBits = Info->SizeInBits; + if (auto ExistingFrag = DAI->getExpression()->getFragmentInfo()) + F.OffsetInBits += ExistingFrag->OffsetInBits; + if (auto Sz = DAI->getVariable()->getSizeInBits()) { + if (F.OffsetInBits == 0 && F.SizeInBits == *Sz) + return std::nullopt; + } + return F; + }(); + + DebugVariable DV = DebugVariable(DAI->getVariable(), FragInfo, + DAI->getDebugLoc().getInlinedAt()); + DebugAggregate DA = {DV.getVariable(), DV.getInlinedAt()}; + + // Cache this info for later. + UntaggedStoreVars[&I].push_back( + {FnVarLocs->insertVariable(DV), *Info}); + + if (Seen.insert(DV).second) + FragmentMap[DA].push_back(DV); + } + } + } + } + + // Sort the fragment map for each DebugAggregate in non-descending + // order of fragment size. Assert no entries are duplicates. + for (auto &Pair : FragmentMap) { + SmallVector<DebugVariable, 8> &Frags = Pair.second; + std::sort( + Frags.begin(), Frags.end(), [](DebugVariable Next, DebugVariable Elmt) { + assert(!(Elmt.getFragmentOrDefault() == Next.getFragmentOrDefault())); + return Elmt.getFragmentOrDefault().SizeInBits > + Next.getFragmentOrDefault().SizeInBits; + }); + } + + // Build the map. + AssignmentTrackingLowering::OverlapMap Map; + for (auto Pair : FragmentMap) { + auto &Frags = Pair.second; + for (auto It = Frags.begin(), IEnd = Frags.end(); It != IEnd; ++It) { + DIExpression::FragmentInfo Frag = It->getFragmentOrDefault(); + // Find the frags that this is contained within. + // + // Because Frags is sorted by size and none have the same offset and + // size, we know that this frag can only be contained by subsequent + // elements. + SmallVector<DebugVariable, 8>::iterator OtherIt = It; + ++OtherIt; + VariableID ThisVar = FnVarLocs->insertVariable(*It); + for (; OtherIt != IEnd; ++OtherIt) { + DIExpression::FragmentInfo OtherFrag = OtherIt->getFragmentOrDefault(); + VariableID OtherVar = FnVarLocs->insertVariable(*OtherIt); + if (fullyContains(OtherFrag, Frag)) + Map[OtherVar].push_back(ThisVar); + } + } + } + + return Map; +} + +bool AssignmentTrackingLowering::run(FunctionVarLocsBuilder *FnVarLocsBuilder) { + if (Fn.size() > MaxNumBlocks) { + LLVM_DEBUG(dbgs() << "[AT] Dropping var locs in: " << Fn.getName() + << ": too many blocks (" << Fn.size() << ")\n"); + at::deleteAll(&Fn); + return false; + } + + FnVarLocs = FnVarLocsBuilder; + + // The general structure here is inspired by VarLocBasedImpl.cpp + // (LiveDebugValues). + + // Build the variable fragment overlap map. + // Note that this pass doesn't handle partial overlaps correctly (FWIW + // neither does LiveDebugVariables) because that is difficult to do and + // appears to be rare occurance. + VarContains = + buildOverlapMapAndRecordDeclares(Fn, FnVarLocs, UntaggedStoreVars); + + // Prepare for traversal. + ReversePostOrderTraversal<Function *> RPOT(&Fn); + std::priority_queue<unsigned int, std::vector<unsigned int>, + std::greater<unsigned int>> + Worklist; + std::priority_queue<unsigned int, std::vector<unsigned int>, + std::greater<unsigned int>> + Pending; + DenseMap<unsigned int, BasicBlock *> OrderToBB; + DenseMap<BasicBlock *, unsigned int> BBToOrder; + { // Init OrderToBB and BBToOrder. + unsigned int RPONumber = 0; + for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) { + OrderToBB[RPONumber] = *RI; + BBToOrder[*RI] = RPONumber; + Worklist.push(RPONumber); + ++RPONumber; + } + LiveIn.init(RPONumber); + LiveOut.init(RPONumber); + } + + // Perform the traversal. + // + // This is a standard "union of predecessor outs" dataflow problem. To solve + // it, we perform join() and process() using the two worklist method until + // the LiveIn data for each block becomes unchanging. The "proof" that this + // terminates can be put together by looking at the comments around LocKind, + // Assignment, and the various join methods, which show that all the elements + // involved are made up of join-semilattices; LiveIn(n) can only + // monotonically increase in value throughout the dataflow. + // + SmallPtrSet<BasicBlock *, 16> Visited; + while (!Worklist.empty()) { + // We track what is on the pending worklist to avoid inserting the same + // thing twice. + SmallPtrSet<BasicBlock *, 16> OnPending; + LLVM_DEBUG(dbgs() << "Processing Worklist\n"); + while (!Worklist.empty()) { + BasicBlock *BB = OrderToBB[Worklist.top()]; + LLVM_DEBUG(dbgs() << "\nPop BB " << BB->getName() << "\n"); + Worklist.pop(); + bool InChanged = join(*BB, Visited); + // Always consider LiveIn changed on the first visit. + InChanged |= Visited.insert(BB).second; + if (InChanged) { + LLVM_DEBUG(dbgs() << BB->getName() << " has new InLocs, process it\n"); + // Mutate a copy of LiveIn while processing BB. After calling process + // LiveSet is the LiveOut set for BB. + BlockInfo LiveSet = LiveIn[BB]; + + // Process the instructions in the block. + process(*BB, &LiveSet); + + // Relatively expensive check: has anything changed in LiveOut for BB? + if (LiveOut[BB] != LiveSet) { + LLVM_DEBUG(dbgs() << BB->getName() + << " has new OutLocs, add succs to worklist: [ "); + LiveOut[BB] = std::move(LiveSet); + for (auto I = succ_begin(BB), E = succ_end(BB); I != E; I++) { + if (OnPending.insert(*I).second) { + LLVM_DEBUG(dbgs() << I->getName() << " "); + Pending.push(BBToOrder[*I]); + } + } + LLVM_DEBUG(dbgs() << "]\n"); + } + } + } + Worklist.swap(Pending); + // At this point, pending must be empty, since it was just the empty + // worklist + assert(Pending.empty() && "Pending should be empty"); + } + + // That's the hard part over. Now we just have some admin to do. + + // Record whether we inserted any intrinsics. + bool InsertedAnyIntrinsics = false; + + // Identify and add defs for single location variables. + // + // Go through all of the defs that we plan to add. If the aggregate variable + // it's a part of is not in the NotAlwaysStackHomed set we can emit a single + // location def and omit the rest. Add an entry to AlwaysStackHomed so that + // we can identify those uneeded defs later. + DenseSet<DebugAggregate> AlwaysStackHomed; + for (const auto &Pair : InsertBeforeMap) { + const auto &Vec = Pair.second; + for (VarLocInfo VarLoc : Vec) { + DebugVariable Var = FnVarLocs->getVariable(VarLoc.VariableID); + DebugAggregate Aggr{Var.getVariable(), Var.getInlinedAt()}; + + // Skip this Var if it's not always stack homed. + if (NotAlwaysStackHomed.contains(Aggr)) + continue; + + // Skip complex cases such as when different fragments of a variable have + // been split into different allocas. Skipping in this case means falling + // back to using a list of defs (which could reduce coverage, but is no + // less correct). + bool Simple = + VarLoc.Expr->getNumElements() == 1 && VarLoc.Expr->startsWithDeref(); + if (!Simple) { + NotAlwaysStackHomed.insert(Aggr); + continue; + } + + // All source assignments to this variable remain and all stores to any + // part of the variable store to the same address (with varying + // offsets). We can just emit a single location for the whole variable. + // + // Unless we've already done so, create the single location def now. + if (AlwaysStackHomed.insert(Aggr).second) { + assert(isa<AllocaInst>(VarLoc.V)); + // TODO: When more complex cases are handled VarLoc.Expr should be + // built appropriately rather than always using an empty DIExpression. + // The assert below is a reminder. + assert(Simple); + VarLoc.Expr = DIExpression::get(Fn.getContext(), std::nullopt); + DebugVariable Var = FnVarLocs->getVariable(VarLoc.VariableID); + FnVarLocs->addSingleLocVar(Var, VarLoc.Expr, VarLoc.DL, VarLoc.V); + InsertedAnyIntrinsics = true; + } + } + } + + // Insert the other DEFs. + for (const auto &[InsertBefore, Vec] : InsertBeforeMap) { + SmallVector<VarLocInfo> NewDefs; + for (const VarLocInfo &VarLoc : Vec) { + DebugVariable Var = FnVarLocs->getVariable(VarLoc.VariableID); + DebugAggregate Aggr{Var.getVariable(), Var.getInlinedAt()}; + // If this variable is always stack homed then we have already inserted a + // dbg.declare and deleted this dbg.value. + if (AlwaysStackHomed.contains(Aggr)) + continue; + NewDefs.push_back(VarLoc); + InsertedAnyIntrinsics = true; + } + + FnVarLocs->setWedge(InsertBefore, std::move(NewDefs)); + } + + InsertedAnyIntrinsics |= emitPromotedVarLocs(FnVarLocs); + + return InsertedAnyIntrinsics; +} + +bool AssignmentTrackingLowering::emitPromotedVarLocs( + FunctionVarLocsBuilder *FnVarLocs) { + bool InsertedAnyIntrinsics = false; + // Go through every block, translating debug intrinsics for fully promoted + // variables into FnVarLocs location defs. No analysis required for these. + for (auto &BB : Fn) { + for (auto &I : BB) { + // Skip instructions other than dbg.values and dbg.assigns. + auto *DVI = dyn_cast<DbgValueInst>(&I); + if (!DVI) + continue; + // Skip variables that haven't been promoted - we've dealt with those + // already. + if (VarsWithStackSlot->contains(getAggregate(DVI))) + continue; + // Wrapper to get a single value (or undef) from DVI. + auto GetValue = [DVI]() -> Value * { + // We can't handle variadic DIExpressions yet so treat those as + // kill locations. + if (DVI->isKillLocation() || DVI->getValue() == nullptr || + DVI->hasArgList()) + return PoisonValue::get(Type::getInt32Ty(DVI->getContext())); + return DVI->getValue(); + }; + Instruction *InsertBefore = I.getNextNode(); + assert(InsertBefore && "Unexpected: debug intrinsics after a terminator"); + FnVarLocs->addVarLoc(InsertBefore, DebugVariable(DVI), + DVI->getExpression(), DVI->getDebugLoc(), + GetValue()); + InsertedAnyIntrinsics = true; + } + } + return InsertedAnyIntrinsics; +} + +/// Remove redundant definitions within sequences of consecutive location defs. +/// This is done using a backward scan to keep the last def describing a +/// specific variable/fragment. +/// +/// This implements removeRedundantDbgInstrsUsingBackwardScan from +/// lib/Transforms/Utils/BasicBlockUtils.cpp for locations described with +/// FunctionVarLocsBuilder instead of with intrinsics. +static bool +removeRedundantDbgLocsUsingBackwardScan(const BasicBlock *BB, + FunctionVarLocsBuilder &FnVarLocs) { + bool Changed = false; + SmallDenseSet<DebugVariable> VariableSet; + + // Scan over the entire block, not just over the instructions mapped by + // FnVarLocs, because wedges in FnVarLocs may only be seperated by debug + // instructions. + for (const Instruction &I : reverse(*BB)) { + if (!isa<DbgVariableIntrinsic>(I)) { + // Sequence of consecutive defs ended. Clear map for the next one. + VariableSet.clear(); + } + + // Get the location defs that start just before this instruction. + const auto *Locs = FnVarLocs.getWedge(&I); + if (!Locs) + continue; + + NumWedgesScanned++; + bool ChangedThisWedge = false; + // The new pruned set of defs, reversed because we're scanning backwards. + SmallVector<VarLocInfo> NewDefsReversed; + + // Iterate over the existing defs in reverse. + for (auto RIt = Locs->rbegin(), REnd = Locs->rend(); RIt != REnd; ++RIt) { + NumDefsScanned++; + const DebugVariable &Key = FnVarLocs.getVariable(RIt->VariableID); + bool FirstDefOfFragment = VariableSet.insert(Key).second; + + // If the same variable fragment is described more than once it is enough + // to keep the last one (i.e. the first found in this reverse iteration). + if (FirstDefOfFragment) { + // New def found: keep it. + NewDefsReversed.push_back(*RIt); + } else { + // Redundant def found: throw it away. Since the wedge of defs is being + // rebuilt, doing nothing is the same as deleting an entry. + ChangedThisWedge = true; + NumDefsRemoved++; + } + continue; + } + + // Un-reverse the defs and replace the wedge with the pruned version. + if (ChangedThisWedge) { + std::reverse(NewDefsReversed.begin(), NewDefsReversed.end()); + FnVarLocs.setWedge(&I, std::move(NewDefsReversed)); + NumWedgesChanged++; + Changed = true; + } + } + + return Changed; +} + +/// Remove redundant location defs using a forward scan. This can remove a +/// location definition that is redundant due to indicating that a variable has +/// the same value as is already being indicated by an earlier def. +/// +/// This implements removeRedundantDbgInstrsUsingForwardScan from +/// lib/Transforms/Utils/BasicBlockUtils.cpp for locations described with +/// FunctionVarLocsBuilder instead of with intrinsics +static bool +removeRedundantDbgLocsUsingForwardScan(const BasicBlock *BB, + FunctionVarLocsBuilder &FnVarLocs) { + bool Changed = false; + DenseMap<DebugVariable, std::pair<Value *, DIExpression *>> VariableMap; + + // Scan over the entire block, not just over the instructions mapped by + // FnVarLocs, because wedges in FnVarLocs may only be seperated by debug + // instructions. + for (const Instruction &I : *BB) { + // Get the defs that come just before this instruction. + const auto *Locs = FnVarLocs.getWedge(&I); + if (!Locs) + continue; + + NumWedgesScanned++; + bool ChangedThisWedge = false; + // The new pruned set of defs. + SmallVector<VarLocInfo> NewDefs; + + // Iterate over the existing defs. + for (const VarLocInfo &Loc : *Locs) { + NumDefsScanned++; + DebugVariable Key(FnVarLocs.getVariable(Loc.VariableID).getVariable(), + std::nullopt, Loc.DL.getInlinedAt()); + auto VMI = VariableMap.find(Key); + + // Update the map if we found a new value/expression describing the + // variable, or if the variable wasn't mapped already. + if (VMI == VariableMap.end() || VMI->second.first != Loc.V || + VMI->second.second != Loc.Expr) { + VariableMap[Key] = {Loc.V, Loc.Expr}; + NewDefs.push_back(Loc); + continue; + } + + // Did not insert this Loc, which is the same as removing it. + ChangedThisWedge = true; + NumDefsRemoved++; + } + + // Replace the existing wedge with the pruned version. + if (ChangedThisWedge) { + FnVarLocs.setWedge(&I, std::move(NewDefs)); + NumWedgesChanged++; + Changed = true; + } + } + + return Changed; +} + +static bool +removeUndefDbgLocsFromEntryBlock(const BasicBlock *BB, + FunctionVarLocsBuilder &FnVarLocs) { + assert(BB->isEntryBlock()); + // Do extra work to ensure that we remove semantically unimportant undefs. + // + // This is to work around the fact that SelectionDAG will hoist dbg.values + // using argument values to the top of the entry block. That can move arg + // dbg.values before undef and constant dbg.values which they previously + // followed. The easiest thing to do is to just try to feed SelectionDAG + // input it's happy with. + // + // Map of {Variable x: Fragments y} where the fragments y of variable x have + // have at least one non-undef location defined already. Don't use directly, + // instead call DefineBits and HasDefinedBits. + SmallDenseMap<DebugAggregate, SmallDenseSet<DIExpression::FragmentInfo>> + VarsWithDef; + // Specify that V (a fragment of A) has a non-undef location. + auto DefineBits = [&VarsWithDef](DebugAggregate A, DebugVariable V) { + VarsWithDef[A].insert(V.getFragmentOrDefault()); + }; + // Return true if a non-undef location has been defined for V (a fragment of + // A). Doesn't imply that the location is currently non-undef, just that a + // non-undef location has been seen previously. + auto HasDefinedBits = [&VarsWithDef](DebugAggregate A, DebugVariable V) { + auto FragsIt = VarsWithDef.find(A); + if (FragsIt == VarsWithDef.end()) + return false; + return llvm::any_of(FragsIt->second, [V](auto Frag) { + return DIExpression::fragmentsOverlap(Frag, V.getFragmentOrDefault()); + }); + }; + + bool Changed = false; + DenseMap<DebugVariable, std::pair<Value *, DIExpression *>> VariableMap; + + // Scan over the entire block, not just over the instructions mapped by + // FnVarLocs, because wedges in FnVarLocs may only be seperated by debug + // instructions. + for (const Instruction &I : *BB) { + // Get the defs that come just before this instruction. + const auto *Locs = FnVarLocs.getWedge(&I); + if (!Locs) + continue; + + NumWedgesScanned++; + bool ChangedThisWedge = false; + // The new pruned set of defs. + SmallVector<VarLocInfo> NewDefs; + + // Iterate over the existing defs. + for (const VarLocInfo &Loc : *Locs) { + NumDefsScanned++; + DebugAggregate Aggr{FnVarLocs.getVariable(Loc.VariableID).getVariable(), + Loc.DL.getInlinedAt()}; + DebugVariable Var = FnVarLocs.getVariable(Loc.VariableID); + + // Remove undef entries that are encountered before any non-undef + // intrinsics from the entry block. + if (isa<UndefValue>(Loc.V) && !HasDefinedBits(Aggr, Var)) { + // Did not insert this Loc, which is the same as removing it. + NumDefsRemoved++; + ChangedThisWedge = true; + continue; + } + + DefineBits(Aggr, Var); + NewDefs.push_back(Loc); + } + + // Replace the existing wedge with the pruned version. + if (ChangedThisWedge) { + FnVarLocs.setWedge(&I, std::move(NewDefs)); + NumWedgesChanged++; + Changed = true; + } + } + + return Changed; +} + +static bool removeRedundantDbgLocs(const BasicBlock *BB, + FunctionVarLocsBuilder &FnVarLocs) { + bool MadeChanges = false; + MadeChanges |= removeRedundantDbgLocsUsingBackwardScan(BB, FnVarLocs); + if (BB->isEntryBlock()) + MadeChanges |= removeUndefDbgLocsFromEntryBlock(BB, FnVarLocs); + MadeChanges |= removeRedundantDbgLocsUsingForwardScan(BB, FnVarLocs); + + if (MadeChanges) + LLVM_DEBUG(dbgs() << "Removed redundant dbg locs from: " << BB->getName() + << "\n"); + return MadeChanges; +} + +static DenseSet<DebugAggregate> findVarsWithStackSlot(Function &Fn) { + DenseSet<DebugAggregate> Result; + for (auto &BB : Fn) { + for (auto &I : BB) { + // Any variable linked to an instruction is considered + // interesting. Ideally we only need to check Allocas, however, a + // DIAssignID might get dropped from an alloca but not stores. In that + // case, we need to consider the variable interesting for NFC behaviour + // with this change. TODO: Consider only looking at allocas. + for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(&I)) { + Result.insert({DAI->getVariable(), DAI->getDebugLoc().getInlinedAt()}); + } + } + } + return Result; +} + +static void analyzeFunction(Function &Fn, const DataLayout &Layout, + FunctionVarLocsBuilder *FnVarLocs) { + // The analysis will generate location definitions for all variables, but we + // only need to perform a dataflow on the set of variables which have a stack + // slot. Find those now. + DenseSet<DebugAggregate> VarsWithStackSlot = findVarsWithStackSlot(Fn); + + bool Changed = false; + + // Use a scope block to clean up AssignmentTrackingLowering before running + // MemLocFragmentFill to reduce peak memory consumption. + { + AssignmentTrackingLowering Pass(Fn, Layout, &VarsWithStackSlot); + Changed = Pass.run(FnVarLocs); + } + + if (Changed) { + MemLocFragmentFill Pass(Fn, &VarsWithStackSlot); + Pass.run(FnVarLocs); + + // Remove redundant entries. As well as reducing memory consumption and + // avoiding waiting cycles later by burning some now, this has another + // important job. That is to work around some SelectionDAG quirks. See + // removeRedundantDbgLocsUsingForwardScan comments for more info on that. + for (auto &BB : Fn) + removeRedundantDbgLocs(&BB, *FnVarLocs); + } +} + +bool AssignmentTrackingAnalysis::runOnFunction(Function &F) { + if (!isAssignmentTrackingEnabled(*F.getParent())) + return false; + + LLVM_DEBUG(dbgs() << "AssignmentTrackingAnalysis run on " << F.getName() + << "\n"); + auto DL = std::make_unique<DataLayout>(F.getParent()); + + // Clear previous results. + Results->clear(); + + FunctionVarLocsBuilder Builder; + analyzeFunction(F, *DL.get(), &Builder); + + // Save these results. + Results->init(Builder); + + if (PrintResults && isFunctionInPrintList(F.getName())) + Results->print(errs(), F); + + // Return false because this pass does not modify the function. + return false; +} + +AssignmentTrackingAnalysis::AssignmentTrackingAnalysis() + : FunctionPass(ID), Results(std::make_unique<FunctionVarLocs>()) {} + +char AssignmentTrackingAnalysis::ID = 0; + +INITIALIZE_PASS(AssignmentTrackingAnalysis, DEBUG_TYPE, + "Assignment Tracking Analysis", false, true) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp index ad51bab8f30b..8f71ec2b490c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/InstSimplifyFolder.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/AtomicExpandUtils.h" #include "llvm/CodeGen/RuntimeLibcalls.h" @@ -60,6 +61,7 @@ namespace { class AtomicExpand : public FunctionPass { const TargetLowering *TLI = nullptr; + const DataLayout *DL = nullptr; public: static char ID; // Pass identification, replacement for typeid @@ -83,13 +85,13 @@ private: bool tryExpandAtomicRMW(AtomicRMWInst *AI); AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI); Value * - insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr, + insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, AtomicOrdering MemOpOrder, - function_ref<Value *(IRBuilder<> &, Value *)> PerformOp); - void - expandAtomicOpToLLSC(Instruction *I, Type *ResultTy, Value *Addr, - Align AddrAlign, AtomicOrdering MemOpOrder, - function_ref<Value *(IRBuilder<> &, Value *)> PerformOp); + function_ref<Value *(IRBuilderBase &, Value *)> PerformOp); + void expandAtomicOpToLLSC( + Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign, + AtomicOrdering MemOpOrder, + function_ref<Value *(IRBuilderBase &, Value *)> PerformOp); void expandPartwordAtomicRMW( AtomicRMWInst *I, TargetLoweringBase::AtomicExpansionKind ExpansionKind); AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI); @@ -98,12 +100,11 @@ private: void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI); AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI); - static Value * - insertRMWCmpXchgLoop(IRBuilder<> &Builder, Type *ResultType, Value *Addr, - Align AddrAlign, AtomicOrdering MemOpOrder, - SyncScope::ID SSID, - function_ref<Value *(IRBuilder<> &, Value *)> PerformOp, - CreateCmpXchgInstFun CreateCmpXchg); + static Value *insertRMWCmpXchgLoop( + IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign, + AtomicOrdering MemOpOrder, SyncScope::ID SSID, + function_ref<Value *(IRBuilderBase &, Value *)> PerformOp, + CreateCmpXchgInstFun CreateCmpXchg); bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI); bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); @@ -125,6 +126,16 @@ private: CreateCmpXchgInstFun CreateCmpXchg); }; +// IRBuilder to be used for replacement atomic instructions. +struct ReplacementIRBuilder : IRBuilder<InstSimplifyFolder> { + // Preserves the DebugLoc from I, and preserves still valid metadata. + explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL) + : IRBuilder(I->getContext(), DL) { + SetInsertPoint(I); + this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections}); + } +}; + } // end anonymous namespace char AtomicExpand::ID = 0; @@ -174,9 +185,11 @@ bool AtomicExpand::runOnFunction(Function &F) { return false; auto &TM = TPC->getTM<TargetMachine>(); - if (!TM.getSubtargetImpl(F)->enableAtomicExpand()) + const auto *Subtarget = TM.getSubtargetImpl(F); + if (!Subtarget->enableAtomicExpand()) return false; - TLI = TM.getSubtargetImpl(F)->getTargetLowering(); + TLI = Subtarget->getTargetLowering(); + DL = &F.getParent()->getDataLayout(); SmallVector<Instruction *, 1> AtomicInsts; @@ -221,6 +234,31 @@ bool AtomicExpand::runOnFunction(Function &F) { } } + if (LI && TLI->shouldCastAtomicLoadInIR(LI) == + TargetLoweringBase::AtomicExpansionKind::CastToInteger) { + I = LI = convertAtomicLoadToIntegerType(LI); + MadeChange = true; + } else if (SI && + TLI->shouldCastAtomicStoreInIR(SI) == + TargetLoweringBase::AtomicExpansionKind::CastToInteger) { + I = SI = convertAtomicStoreToIntegerType(SI); + MadeChange = true; + } else if (RMWI && + TLI->shouldCastAtomicRMWIInIR(RMWI) == + TargetLoweringBase::AtomicExpansionKind::CastToInteger) { + I = RMWI = convertAtomicXchgToIntegerType(RMWI); + MadeChange = true; + } else if (CASI) { + // TODO: when we're ready to make the change at the IR level, we can + // extend convertCmpXchgToInteger for floating point too. + if (CASI->getCompareOperand()->getType()->isPointerTy()) { + // TODO: add a TLI hook to control this so that each target can + // convert to lowering the original type one at a time. + I = CASI = convertCmpXchgToIntegerType(CASI); + MadeChange = true; + } + } + if (TLI->shouldInsertFencesForAtomic(I)) { auto FenceOrdering = AtomicOrdering::Monotonic; if (LI && isAcquireOrStronger(LI->getOrdering())) { @@ -251,33 +289,31 @@ bool AtomicExpand::runOnFunction(Function &F) { if (FenceOrdering != AtomicOrdering::Monotonic) { MadeChange |= bracketInstWithFences(I, FenceOrdering); } - } - - if (LI) { - if (TLI->shouldCastAtomicLoadInIR(LI) == - TargetLoweringBase::AtomicExpansionKind::CastToInteger) { - // TODO: add a TLI hook to control this so that each target can - // convert to lowering the original type one at a time. - LI = convertAtomicLoadToIntegerType(LI); - assert(LI->getType()->isIntegerTy() && "invariant broken"); + } else if (I->hasAtomicStore() && + TLI->shouldInsertTrailingFenceForAtomicStore(I)) { + auto FenceOrdering = AtomicOrdering::Monotonic; + if (SI) + FenceOrdering = SI->getOrdering(); + else if (RMWI) + FenceOrdering = RMWI->getOrdering(); + else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) != + TargetLoweringBase::AtomicExpansionKind::LLSC) + // LLSC is handled in expandAtomicCmpXchg(). + FenceOrdering = CASI->getSuccessOrdering(); + + IRBuilder Builder(I); + if (auto TrailingFence = + TLI->emitTrailingFence(Builder, I, FenceOrdering)) { + TrailingFence->moveAfter(I); MadeChange = true; } + } + if (LI) MadeChange |= tryExpandAtomicLoad(LI); - } else if (SI) { - if (TLI->shouldCastAtomicStoreInIR(SI) == - TargetLoweringBase::AtomicExpansionKind::CastToInteger) { - // TODO: add a TLI hook to control this so that each target can - // convert to lowering the original type one at a time. - SI = convertAtomicStoreToIntegerType(SI); - assert(SI->getValueOperand()->getType()->isIntegerTy() && - "invariant broken"); - MadeChange = true; - } - - if (tryExpandAtomicStore(SI)) - MadeChange = true; - } else if (RMWI) { + else if (SI) + MadeChange |= tryExpandAtomicStore(SI); + else if (RMWI) { // There are two different ways of expanding RMW instructions: // - into a load if it is idempotent // - into a Cmpxchg/LL-SC loop otherwise @@ -287,15 +323,6 @@ bool AtomicExpand::runOnFunction(Function &F) { MadeChange = true; } else { AtomicRMWInst::BinOp Op = RMWI->getOperation(); - if (TLI->shouldCastAtomicRMWIInIR(RMWI) == - TargetLoweringBase::AtomicExpansionKind::CastToInteger) { - // TODO: add a TLI hook to control this so that each target can - // convert to lowering the original type one at a time. - RMWI = convertAtomicXchgToIntegerType(RMWI); - assert(RMWI->getValOperand()->getType()->isIntegerTy() && - "invariant broken"); - MadeChange = true; - } unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; unsigned ValueSize = getAtomicOpSize(RMWI); if (ValueSize < MinCASSize && @@ -307,28 +334,14 @@ bool AtomicExpand::runOnFunction(Function &F) { MadeChange |= tryExpandAtomicRMW(RMWI); } - } else if (CASI) { - // TODO: when we're ready to make the change at the IR level, we can - // extend convertCmpXchgToInteger for floating point too. - assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() && - "unimplemented - floating point not legal at IR level"); - if (CASI->getCompareOperand()->getType()->isPointerTy()) { - // TODO: add a TLI hook to control this so that each target can - // convert to lowering the original type one at a time. - CASI = convertCmpXchgToIntegerType(CASI); - assert(CASI->getCompareOperand()->getType()->isIntegerTy() && - "invariant broken"); - MadeChange = true; - } - + } else if (CASI) MadeChange |= tryExpandAtomicCmpXchg(CASI); - } } return MadeChange; } bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) { - IRBuilder<> Builder(I); + ReplacementIRBuilder Builder(I, *DL); auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order); @@ -357,7 +370,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { auto *M = LI->getModule(); Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout()); - IRBuilder<> Builder(LI); + ReplacementIRBuilder Builder(LI, *DL); Value *Addr = LI->getPointerOperand(); Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace()); @@ -381,7 +394,7 @@ AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) { Type *NewTy = getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout()); - IRBuilder<> Builder(RMWI); + ReplacementIRBuilder Builder(RMWI, *DL); Value *Addr = RMWI->getPointerOperand(); Value *Val = RMWI->getValOperand(); @@ -413,7 +426,7 @@ bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { expandAtomicOpToLLSC( LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(), LI->getOrdering(), - [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; }); + [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; }); return true; case TargetLoweringBase::AtomicExpansionKind::LLOnly: return expandAtomicLoadToLL(LI); @@ -443,7 +456,7 @@ bool AtomicExpand::tryExpandAtomicStore(StoreInst *SI) { } bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { - IRBuilder<> Builder(LI); + ReplacementIRBuilder Builder(LI, *DL); // On some architectures, load-linked instructions are atomic for larger // sizes than normal loads. For example, the only 64-bit load guaranteed @@ -459,7 +472,7 @@ bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { } bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { - IRBuilder<> Builder(LI); + ReplacementIRBuilder Builder(LI, *DL); AtomicOrdering Order = LI->getOrdering(); if (Order == AtomicOrdering::Unordered) Order = AtomicOrdering::Monotonic; @@ -488,7 +501,7 @@ bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { /// mechanism, we convert back to the old format which the backends understand. /// Each backend will need individual work to recognize the new format. StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { - IRBuilder<> Builder(SI); + ReplacementIRBuilder Builder(SI, *DL); auto *M = SI->getModule(); Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(), M->getDataLayout()); @@ -514,7 +527,7 @@ void AtomicExpand::expandAtomicStore(StoreInst *SI) { // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. // It is the responsibility of the target to only signal expansion via // shouldExpandAtomicRMW in cases where this is required and possible. - IRBuilder<> Builder(SI); + ReplacementIRBuilder Builder(SI, *DL); AtomicOrdering Ordering = SI->getOrdering(); assert(Ordering != AtomicOrdering::NotAtomic); AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered @@ -529,7 +542,7 @@ void AtomicExpand::expandAtomicStore(StoreInst *SI) { tryExpandAtomicRMW(AI); } -static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, +static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) { @@ -569,7 +582,7 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { expandPartwordAtomicRMW(AI, TargetLoweringBase::AtomicExpansionKind::LLSC); } else { - auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) { + auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) { return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded, AI->getValOperand()); }; @@ -582,10 +595,6 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; unsigned ValueSize = getAtomicOpSize(AI); if (ValueSize < MinCASSize) { - // TODO: Handle atomicrmw fadd/fsub - if (AI->getType()->isFloatingPointTy()) - return false; - expandPartwordAtomicRMW(AI, TargetLoweringBase::AtomicExpansionKind::CmpXChg); } else { @@ -613,8 +622,15 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { TLI->emitBitTestAtomicRMWIntrinsic(AI); return true; } + case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: { + TLI->emitCmpArithAtomicRMWIntrinsic(AI); + return true; + } case TargetLoweringBase::AtomicExpansionKind::NotAtomic: return lowerAtomicRMWInst(AI); + case TargetLoweringBase::AtomicExpansionKind::Expand: + TLI->emitExpandAtomicRMW(AI); + return true; default: llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); } @@ -626,6 +642,7 @@ struct PartwordMaskValues { // These three fields are guaranteed to be set by createMaskInstrs. Type *WordType = nullptr; Type *ValueType = nullptr; + Type *IntValueType = nullptr; Value *AlignedAddr = nullptr; Align AlignedAddrAlignment; // The remaining fields can be null. @@ -679,9 +696,9 @@ raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) { /// include only the part that would've been loaded from Addr. /// /// Inv_Mask: The inverse of Mask. -static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, - Type *ValueType, Value *Addr, - Align AddrAlign, +static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, + Instruction *I, Type *ValueType, + Value *Addr, Align AddrAlign, unsigned MinWordSize) { PartwordMaskValues PMV; @@ -690,7 +707,11 @@ static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, const DataLayout &DL = M->getDataLayout(); unsigned ValueSize = DL.getTypeStoreSize(ValueType); - PMV.ValueType = ValueType; + PMV.ValueType = PMV.IntValueType = ValueType; + if (PMV.ValueType->isFloatingPointTy()) + PMV.IntValueType = + Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits()); + PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8) : ValueType; if (PMV.ValueType == PMV.WordType) { @@ -701,19 +722,29 @@ static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, return PMV; } + PMV.AlignedAddrAlignment = Align(MinWordSize); + assert(ValueSize < MinWordSize); - Type *WordPtrType = - PMV.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace()); + PointerType *PtrTy = cast<PointerType>(Addr->getType()); + Type *WordPtrType = PMV.WordType->getPointerTo(PtrTy->getAddressSpace()); + IntegerType *IntTy = DL.getIntPtrType(Ctx, PtrTy->getAddressSpace()); + Value *PtrLSB; - // TODO: we could skip some of this if AddrAlign >= MinWordSize. - Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx)); - PMV.AlignedAddr = Builder.CreateIntToPtr( - Builder.CreateAnd(AddrInt, ~(uint64_t)(MinWordSize - 1)), WordPtrType, - "AlignedAddr"); - PMV.AlignedAddrAlignment = Align(MinWordSize); + if (AddrAlign < MinWordSize) { + PMV.AlignedAddr = Builder.CreateIntrinsic( + Intrinsic::ptrmask, {PtrTy, IntTy}, + {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr, + "AlignedAddr"); + + Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy); + PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB"); + } else { + // If the alignment is high enough, the LSB are known 0. + PMV.AlignedAddr = Addr; + PtrLSB = ConstantInt::getNullValue(IntTy); + } - Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB"); if (DL.isLittleEndian()) { // turn bytes into bits PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3); @@ -727,28 +758,36 @@ static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, PMV.Mask = Builder.CreateShl( ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt, "Mask"); + PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask"); + + // Cast for typed pointers. + PMV.AlignedAddr = + Builder.CreateBitCast(PMV.AlignedAddr, WordPtrType, "AlignedAddr"); + return PMV; } -static Value *extractMaskedValue(IRBuilder<> &Builder, Value *WideWord, +static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV) { assert(WideWord->getType() == PMV.WordType && "Widened type mismatch"); if (PMV.WordType == PMV.ValueType) return WideWord; Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted"); - Value *Trunc = Builder.CreateTrunc(Shift, PMV.ValueType, "extracted"); - return Trunc; + Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted"); + return Builder.CreateBitCast(Trunc, PMV.ValueType); } -static Value *insertMaskedValue(IRBuilder<> &Builder, Value *WideWord, +static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV) { assert(WideWord->getType() == PMV.WordType && "Widened type mismatch"); assert(Updated->getType() == PMV.ValueType && "Value type mismatch"); if (PMV.WordType == PMV.ValueType) return Updated; + Updated = Builder.CreateBitCast(Updated, PMV.IntValueType); + Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended"); Value *Shift = Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true); @@ -761,7 +800,7 @@ static Value *insertMaskedValue(IRBuilder<> &Builder, Value *WideWord, /// operation. (That is, only the bits under the Mask should be /// affected by the operation) static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, - IRBuilder<> &Builder, Value *Loaded, + IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV) { // TODO: update to use @@ -790,10 +829,16 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, case AtomicRMWInst::Max: case AtomicRMWInst::Min: case AtomicRMWInst::UMax: - case AtomicRMWInst::UMin: { - // Finally, comparison ops will operate on the full value, so - // truncate down to the original size, and expand out again after - // doing the operation. + case AtomicRMWInst::UMin: + case AtomicRMWInst::FAdd: + case AtomicRMWInst::FSub: + case AtomicRMWInst::FMin: + case AtomicRMWInst::FMax: + case AtomicRMWInst::UIncWrap: + case AtomicRMWInst::UDecWrap: { + // Finally, other ops will operate on the full value, so truncate down to + // the original size, and expand out again after doing the + // operation. Bitcasts will be inserted for FP values. Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV); Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc); Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV); @@ -816,17 +861,23 @@ void AtomicExpand::expandPartwordAtomicRMW( AtomicOrdering MemOpOrder = AI->getOrdering(); SyncScope::ID SSID = AI->getSyncScopeID(); - IRBuilder<> Builder(AI); + ReplacementIRBuilder Builder(AI, *DL); PartwordMaskValues PMV = createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); - Value *ValOperand_Shifted = - Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), - PMV.ShiftAmt, "ValOperand_Shifted"); + Value *ValOperand_Shifted = nullptr; + if (AI->getOperation() == AtomicRMWInst::Xchg || + AI->getOperation() == AtomicRMWInst::Add || + AI->getOperation() == AtomicRMWInst::Sub || + AI->getOperation() == AtomicRMWInst::Nand) { + ValOperand_Shifted = + Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), + PMV.ShiftAmt, "ValOperand_Shifted"); + } - auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) { + auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) { return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded, ValOperand_Shifted, AI->getValOperand(), PMV); }; @@ -850,7 +901,7 @@ void AtomicExpand::expandPartwordAtomicRMW( // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width. AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) { - IRBuilder<> Builder(AI); + ReplacementIRBuilder Builder(AI, *DL); AtomicRMWInst::BinOp Op = AI->getOperation(); assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || @@ -925,7 +976,7 @@ bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { BasicBlock *BB = CI->getParent(); Function *F = BB->getParent(); - IRBuilder<> Builder(CI); + ReplacementIRBuilder Builder(CI, *DL); LLVMContext &Ctx = Builder.getContext(); BasicBlock *EndBB = @@ -999,7 +1050,7 @@ bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { Builder.SetInsertPoint(CI); Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); - Value *Res = UndefValue::get(CI->getType()); + Value *Res = PoisonValue::get(CI->getType()); Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); Res = Builder.CreateInsertValue(Res, Success, 1); @@ -1011,8 +1062,8 @@ bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { void AtomicExpand::expandAtomicOpToLLSC( Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign, AtomicOrdering MemOpOrder, - function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) { - IRBuilder<> Builder(I); + function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) { + ReplacementIRBuilder Builder(I, *DL); Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign, MemOpOrder, PerformOp); @@ -1021,7 +1072,7 @@ void AtomicExpand::expandAtomicOpToLLSC( } void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { - IRBuilder<> Builder(AI); + ReplacementIRBuilder Builder(AI, *DL); PartwordMaskValues PMV = createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), @@ -1047,7 +1098,7 @@ void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { } void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { - IRBuilder<> Builder(CI); + ReplacementIRBuilder Builder(CI, *DL); PartwordMaskValues PMV = createMaskInstrs( Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(), @@ -1063,7 +1114,7 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask, CI->getMergedOrdering()); Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); - Value *Res = UndefValue::get(CI->getType()); + Value *Res = PoisonValue::get(CI->getType()); Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); Value *Success = Builder.CreateICmpEQ( CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success"); @@ -1074,9 +1125,9 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { } Value *AtomicExpand::insertRMWLLSCLoop( - IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, + IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, AtomicOrdering MemOpOrder, - function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) { + function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) { LLVMContext &Ctx = Builder.getContext(); BasicBlock *BB = Builder.GetInsertBlock(); Function *F = BB->getParent(); @@ -1134,7 +1185,7 @@ AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) { Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(), M->getDataLayout()); - IRBuilder<> Builder(CI); + ReplacementIRBuilder Builder(CI, *DL); Value *Addr = CI->getPointerOperand(); Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace()); @@ -1155,7 +1206,7 @@ AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) { OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType()); - Value *Res = UndefValue::get(CI->getType()); + Value *Res = PoisonValue::get(CI->getType()); Res = Builder.CreateInsertValue(Res, OldVal, 0); Res = Builder.CreateInsertValue(Res, Succ, 1); @@ -1258,8 +1309,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB); auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB); - // This grabs the DebugLoc from CI - IRBuilder<> Builder(CI); + ReplacementIRBuilder Builder(CI, *DL); // The split call above "helpfully" added a branch at the end of BB (to the // wrong place), but we might want a fence too. It's easiest to just remove @@ -1326,7 +1376,8 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // Make sure later instructions don't get reordered with a fence if // necessary. Builder.SetInsertPoint(SuccessBB); - if (ShouldInsertFencesForAtomic) + if (ShouldInsertFencesForAtomic || + TLI->shouldInsertTrailingFenceForAtomicStore(CI)) TLI->emitTrailingFence(Builder, CI, SuccessOrder); Builder.CreateBr(ExitBB); @@ -1400,7 +1451,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // Some use of the full struct return that we don't understand has happened, // so we've got to reconstruct it properly. Value *Res; - Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); + Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0); Res = Builder.CreateInsertValue(Res, Success, 1); CI->replaceAllUsesWith(Res); @@ -1439,9 +1490,9 @@ bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst *RMWI) { } Value *AtomicExpand::insertRMWCmpXchgLoop( - IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, + IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, - function_ref<Value *(IRBuilder<> &, Value *)> PerformOp, + function_ref<Value *(IRBuilderBase &, Value *)> PerformOp, CreateCmpXchgInstFun CreateCmpXchg) { LLVMContext &Ctx = Builder.getContext(); BasicBlock *BB = Builder.GetInsertBlock(); @@ -1524,11 +1575,11 @@ bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // Note: This function is exposed externally by AtomicExpandUtils.h bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg) { - IRBuilder<> Builder(AI); + ReplacementIRBuilder Builder(AI, AI->getModule()->getDataLayout()); Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop( Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(), AI->getOrdering(), AI->getSyncScopeID(), - [&](IRBuilder<> &Builder, Value *Loaded) { + [&](IRBuilderBase &Builder, Value *Loaded) { return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded, AI->getValOperand()); }, @@ -1634,19 +1685,19 @@ static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) { case AtomicRMWInst::BAD_BINOP: llvm_unreachable("Should not have BAD_BINOP."); case AtomicRMWInst::Xchg: - return makeArrayRef(LibcallsXchg); + return ArrayRef(LibcallsXchg); case AtomicRMWInst::Add: - return makeArrayRef(LibcallsAdd); + return ArrayRef(LibcallsAdd); case AtomicRMWInst::Sub: - return makeArrayRef(LibcallsSub); + return ArrayRef(LibcallsSub); case AtomicRMWInst::And: - return makeArrayRef(LibcallsAnd); + return ArrayRef(LibcallsAnd); case AtomicRMWInst::Or: - return makeArrayRef(LibcallsOr); + return ArrayRef(LibcallsOr); case AtomicRMWInst::Xor: - return makeArrayRef(LibcallsXor); + return ArrayRef(LibcallsXor); case AtomicRMWInst::Nand: - return makeArrayRef(LibcallsNand); + return ArrayRef(LibcallsNand); case AtomicRMWInst::Max: case AtomicRMWInst::Min: case AtomicRMWInst::UMax: @@ -1655,6 +1706,8 @@ static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) { case AtomicRMWInst::FMin: case AtomicRMWInst::FAdd: case AtomicRMWInst::FSub: + case AtomicRMWInst::UIncWrap: + case AtomicRMWInst::UDecWrap: // No atomic libcalls are available for max/min/umax/umin. return {}; } @@ -1678,7 +1731,7 @@ void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) { // CAS libcall, via a CAS loop, instead. if (!Success) { expandAtomicRMWToCmpXchg( - I, [this](IRBuilder<> &Builder, Value *Addr, Value *Loaded, + I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) { // Create the CAS instruction normally... @@ -1893,7 +1946,7 @@ bool AtomicExpand::expandAtomicOpToLibcall( // The final result from the CAS is {load of 'expected' alloca, bool result // from call} Type *FinalResultTy = I->getType(); - Value *V = UndefValue::get(FinalResultTy); + Value *V = PoisonValue::get(FinalResultTy); Value *ExpectedOut = Builder.CreateAlignedLoad( CASExpected->getType(), AllocaCASExpected, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp index 958212a0e448..e7e73606de07 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -68,17 +68,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/BasicBlockSectionUtils.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Target/TargetMachine.h" +#include <optional> using namespace llvm; @@ -130,9 +130,9 @@ INITIALIZE_PASS(BasicBlockSections, "bbsections-prepare", // This function updates and optimizes the branching instructions of every basic // block in a given function to account for changes in the layout. -static void updateBranches( - MachineFunction &MF, - const SmallVector<MachineBasicBlock *, 4> &PreLayoutFallThroughs) { +static void +updateBranches(MachineFunction &MF, + const SmallVector<MachineBasicBlock *> &PreLayoutFallThroughs) { const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); SmallVector<MachineOperand, 4> Cond; for (auto &MBB : MF) { @@ -167,7 +167,7 @@ static void updateBranches( bool getBBClusterInfoForFunction( const MachineFunction &MF, BasicBlockSectionsProfileReader *BBSectionsProfileReader, - std::vector<Optional<BBClusterInfo>> &V) { + DenseMap<unsigned, BBClusterInfo> &V) { // Find the assoicated cluster information. std::pair<bool, SmallVector<BBClusterInfo, 4>> P = @@ -182,13 +182,8 @@ bool getBBClusterInfoForFunction( return true; } - V.resize(MF.getNumBlockIDs()); - for (auto bbClusterInfo : P.second) { - // Bail out if the cluster information contains invalid MBB numbers. - if (bbClusterInfo.MBBNumber >= MF.getNumBlockIDs()) - return false; - V[bbClusterInfo.MBBNumber] = bbClusterInfo; - } + for (const BBClusterInfo &BBCI : P.second) + V[BBCI.BBID] = BBCI; return true; } @@ -199,16 +194,17 @@ bool getBBClusterInfoForFunction( // clusters, they are moved into a single "Exception" section. Eventually, // clusters are ordered in increasing order of their IDs, with the "Exception" // and "Cold" succeeding all other clusters. -// FuncBBClusterInfo represent the cluster information for basic blocks. If this -// is empty, it means unique sections for all basic blocks in the function. +// FuncBBClusterInfo represent the cluster information for basic blocks. It +// maps from BBID of basic blocks to their cluster information. If this is +// empty, it means unique sections for all basic blocks in the function. static void assignSections(MachineFunction &MF, - const std::vector<Optional<BBClusterInfo>> &FuncBBClusterInfo) { + const DenseMap<unsigned, BBClusterInfo> &FuncBBClusterInfo) { assert(MF.hasBBSections() && "BB Sections is not set for function."); // This variable stores the section ID of the cluster containing eh_pads (if // all eh_pads are one cluster). If more than one cluster contain eh_pads, we // set it equal to ExceptionSectionID. - Optional<MBBSectionID> EHPadsSectionID; + std::optional<MBBSectionID> EHPadsSectionID; for (auto &MBB : MF) { // With the 'all' option, every basic block is placed in a unique section. @@ -218,15 +214,21 @@ assignSections(MachineFunction &MF, if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All || FuncBBClusterInfo.empty()) { // If unique sections are desired for all basic blocks of the function, we - // set every basic block's section ID equal to its number (basic block - // id). This further ensures that basic blocks are ordered canonically. - MBB.setSectionID({static_cast<unsigned int>(MBB.getNumber())}); - } else if (FuncBBClusterInfo[MBB.getNumber()]) - MBB.setSectionID(FuncBBClusterInfo[MBB.getNumber()]->ClusterID); - else { - // BB goes into the special cold section if it is not specified in the - // cluster info map. - MBB.setSectionID(MBBSectionID::ColdSectionID); + // set every basic block's section ID equal to its original position in + // the layout (which is equal to its number). This ensures that basic + // blocks are ordered canonically. + MBB.setSectionID(MBB.getNumber()); + } else { + // TODO: Replace `getBBIDOrNumber` with `getBBID` once version 1 is + // deprecated. + auto I = FuncBBClusterInfo.find(MBB.getBBIDOrNumber()); + if (I != FuncBBClusterInfo.end()) { + MBB.setSectionID(I->second.ClusterID); + } else { + // BB goes into the special cold section if it is not specified in the + // cluster info map. + MBB.setSectionID(MBBSectionID::ColdSectionID); + } } if (MBB.isEHPad() && EHPadsSectionID != MBB.getSectionID() && @@ -249,12 +251,14 @@ assignSections(MachineFunction &MF, void llvm::sortBasicBlocksAndUpdateBranches( MachineFunction &MF, MachineBasicBlockComparator MBBCmp) { - SmallVector<MachineBasicBlock *, 4> PreLayoutFallThroughs( - MF.getNumBlockIDs()); + [[maybe_unused]] const MachineBasicBlock *EntryBlock = &MF.front(); + SmallVector<MachineBasicBlock *> PreLayoutFallThroughs(MF.getNumBlockIDs()); for (auto &MBB : MF) PreLayoutFallThroughs[MBB.getNumber()] = MBB.getFallThrough(); MF.sort(MBBCmp); + assert(&MF.front() == EntryBlock && + "Entry block should not be displaced by basic block sections"); // Set IsBeginSection and IsEndSection according to the assigned section IDs. MF.assignBeginEndSections(); @@ -317,11 +321,14 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { if (BBSectionsType == BasicBlockSection::List && hasInstrProfHashMismatch(MF)) return true; - - // Renumber blocks before sorting them for basic block sections. This is - // useful during sorting, basic blocks in the same section will retain the - // default order. This renumbering should also be done for basic block - // labels to match the profiles with the correct blocks. + // Renumber blocks before sorting them. This is useful during sorting, + // basic blocks in the same section will retain the default order. + // This renumbering should also be done for basic block labels to match the + // profiles with the correct blocks. + // For LLVM_BB_ADDR_MAP versions 2 and higher, this renumbering serves + // the different purpose of accessing the original layout positions and + // finding the original fallthroughs. + // TODO: Change the above comment accordingly when version 1 is deprecated. MF.RenumberBlocks(); if (BBSectionsType == BasicBlockSection::Labels) { @@ -331,7 +338,8 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { BBSectionsProfileReader = &getAnalysis<BasicBlockSectionsProfileReader>(); - std::vector<Optional<BBClusterInfo>> FuncBBClusterInfo; + // Map from BBID of blocks to their cluster information. + DenseMap<unsigned, BBClusterInfo> FuncBBClusterInfo; if (BBSectionsType == BasicBlockSection::List && !getBBClusterInfoForFunction(MF, BBSectionsProfileReader, FuncBBClusterInfo)) @@ -371,8 +379,8 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { // If the two basic block are in the same section, the order is decided by // their position within the section. if (XSectionID.Type == MBBSectionID::SectionType::Default) - return FuncBBClusterInfo[X.getNumber()]->PositionInCluster < - FuncBBClusterInfo[Y.getNumber()]->PositionInCluster; + return FuncBBClusterInfo.lookup(X.getBBIDOrNumber()).PositionInCluster < + FuncBBClusterInfo.lookup(Y.getBBIDOrNumber()).PositionInCluster; return X.getNumber() < Y.getNumber(); }; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index c2acf115998b..5bc8d82debc3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -93,23 +93,23 @@ static Error getBBClusterInfo(const MemoryBuffer *MBuf, if (FI == ProgramBBClusterInfo.end()) return invalidProfileError( "Cluster list does not follow a function name specifier."); - SmallVector<StringRef, 4> BBIndexes; - S.split(BBIndexes, ' '); + SmallVector<StringRef, 4> BBIDs; + S.split(BBIDs, ' '); // Reset current cluster position. CurrentPosition = 0; - for (auto BBIndexStr : BBIndexes) { - unsigned long long BBIndex; - if (getAsUnsignedInteger(BBIndexStr, 10, BBIndex)) + for (auto BBIDStr : BBIDs) { + unsigned long long BBID; + if (getAsUnsignedInteger(BBIDStr, 10, BBID)) return invalidProfileError(Twine("Unsigned integer expected: '") + - BBIndexStr + "'."); - if (!FuncBBIDs.insert(BBIndex).second) + BBIDStr + "'."); + if (!FuncBBIDs.insert(BBID).second) return invalidProfileError(Twine("Duplicate basic block id found '") + - BBIndexStr + "'."); - if (!BBIndex && CurrentPosition) + BBIDStr + "'."); + if (BBID == 0 && CurrentPosition) return invalidProfileError("Entry BB (0) does not begin a cluster."); - FI->second.emplace_back(BBClusterInfo{ - ((unsigned)BBIndex), CurrentCluster, CurrentPosition++}); + FI->second.emplace_back( + BBClusterInfo{((unsigned)BBID), CurrentCluster, CurrentPosition++}); } CurrentCluster++; } else { // This is a function name specifier. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp index 07be03d2dab9..d491691135dc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp @@ -1875,7 +1875,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, addRegAndItsAliases(Reg, TRI, Uses); } else { if (Uses.erase(Reg)) { - if (Register::isPhysicalRegister(Reg)) { + if (Reg.isPhysical()) { for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) Uses.erase(*SubRegs); // Use sub-registers to be conservative } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp index 29508f8f35a6..016c81dc5aa4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -23,6 +23,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include <cassert> @@ -87,7 +88,9 @@ class BranchRelaxation : public MachineFunctionPass { bool relaxBranchInstructions(); void scanFunction(); - MachineBasicBlock *createNewBlockAfter(MachineBasicBlock &BB); + MachineBasicBlock *createNewBlockAfter(MachineBasicBlock &OrigMBB); + MachineBasicBlock *createNewBlockAfter(MachineBasicBlock &OrigMBB, + const BasicBlock *BB); MachineBasicBlock *splitBlockBeforeInstr(MachineInstr &MI, MachineBasicBlock *DestBB); @@ -201,12 +204,20 @@ void BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) { } } -/// Insert a new empty basic block and insert it after \BB -MachineBasicBlock *BranchRelaxation::createNewBlockAfter(MachineBasicBlock &BB) { +/// Insert a new empty MachineBasicBlock and insert it after \p OrigMBB +MachineBasicBlock * +BranchRelaxation::createNewBlockAfter(MachineBasicBlock &OrigBB) { + return createNewBlockAfter(OrigBB, OrigBB.getBasicBlock()); +} + +/// Insert a new empty MachineBasicBlock with \p BB as its BasicBlock +/// and insert it after \p OrigMBB +MachineBasicBlock * +BranchRelaxation::createNewBlockAfter(MachineBasicBlock &OrigMBB, + const BasicBlock *BB) { // Create a new MBB for the code after the OrigBB. - MachineBasicBlock *NewBB = - MF->CreateMachineBasicBlock(BB.getBasicBlock()); - MF->insert(++BB.getIterator(), NewBB); + MachineBasicBlock *NewBB = MF->CreateMachineBasicBlock(BB); + MF->insert(++OrigMBB.getIterator(), NewBB); // Insert an entry into BlockInfo to align it properly with the block numbers. BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); @@ -431,7 +442,7 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) { bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) { MachineBasicBlock *MBB = MI.getParent(); - + SmallVector<MachineOperand, 4> Cond; unsigned OldBrSize = TII->getInstSizeInBytes(MI); MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI); @@ -466,7 +477,8 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) { // Create the optional restore block and, initially, place it at the end of // function. That block will be placed later if it's used; otherwise, it will // be erased. - MachineBasicBlock *RestoreBB = createNewBlockAfter(MF->back()); + MachineBasicBlock *RestoreBB = createNewBlockAfter(MF->back(), + DestBB->getBasicBlock()); TII->insertIndirectBranch(*BranchBB, *DestBB, *RestoreBB, DL, DestOffset - SrcOffset, RS.get()); @@ -482,10 +494,11 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) { // restore blocks are just duplicated for each far branch. assert(!DestBB->isEntryBlock()); MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator()); - if (auto *FT = PrevBB->getFallThrough()) { + // Fall through only if PrevBB has no unconditional branch as one of its + // terminators. + if (auto *FT = PrevBB->getLogicalFallThrough()) { assert(FT == DestBB); TII->insertUnconditionalBranch(*PrevBB, FT, DebugLoc()); - // Recalculate the block size. BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB); } // Now, RestoreBB could be placed directly before DestBB. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp index 57170c58db14..310273173647 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp @@ -135,6 +135,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, // Get the undef operand's register class const TargetRegisterClass *OpRC = TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF); + assert(OpRC && "Not a valid register class"); // If the instruction has a true dependency, we can hide the false depdency // behind it. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp index 42523c47a671..25741686a829 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp @@ -18,7 +18,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" @@ -89,10 +88,10 @@ class CFIInstrInserter : public MachineFunctionPass { #define INVALID_OFFSET INT_MAX /// contains the location where CSR register is saved. struct CSRSavedLocation { - CSRSavedLocation(Optional<unsigned> R, Optional<int> O) + CSRSavedLocation(std::optional<unsigned> R, std::optional<int> O) : Reg(R), Offset(O) {} - Optional<unsigned> Reg; - Optional<int> Offset; + std::optional<unsigned> Reg; + std::optional<int> Offset; }; /// Contains cfa offset and register values valid at entry and exit of basic @@ -148,7 +147,7 @@ void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) { MF.getSubtarget().getFrameLowering()->getInitialCFAOffset(MF); // Initial CFA register value i.e. the one valid at the beginning of the // function. - unsigned InitialRegister = + Register InitialRegister = MF.getSubtarget().getFrameLowering()->getInitialCFARegister(MF); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); unsigned NumRegs = TRI.getNumRegs(); @@ -187,8 +186,8 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { // Determine cfa offset and register set by the block. for (MachineInstr &MI : *MBBInfo.MBB) { if (MI.isCFIInstruction()) { - Optional<unsigned> CSRReg; - Optional<int> CSROffset; + std::optional<unsigned> CSRReg; + std::optional<int> CSROffset; unsigned CFIIndex = MI.getOperand(0).getCFIIndex(); const MCCFIInstruction &CFI = Instrs[CFIIndex]; switch (CFI.getOperation()) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp index 519b24c21d7a..615687abad81 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -61,7 +61,7 @@ Register VirtRegAuxInfo::copyHint(const MachineInstr *MI, unsigned Reg, if (!HReg) return 0; - if (Register::isVirtualRegister(HReg)) + if (HReg.isVirtual()) return Sub == HSub ? HReg : Register(); const TargetRegisterClass *RC = MRI.getRegClass(Reg); @@ -107,7 +107,7 @@ bool VirtRegAuxInfo::isRematerializable(const LiveInterval &LI, // If the original (pre-splitting) registers match this // copy came from a split. - if (!Register::isVirtualRegister(Reg) || VRM.getOriginal(Reg) != Original) + if (!Reg.isVirtual() || VRM.getOriginal(Reg) != Original) return false; // Follow the copy live-in value. @@ -278,7 +278,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, if (TargetHint.first == 0 && TargetHint.second) MRI.clearSimpleHint(LI.reg()); - std::set<Register> HintedRegs; + SmallSet<Register, 4> HintedRegs; for (const auto &Hint : CopyHints) { if (!HintedRegs.insert(Hint.Reg).second || (TargetHint.first != 0 && Hint.Reg == TargetHint.second)) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp index f74ff30ab2e1..ce1ef571c9df 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp @@ -231,7 +231,7 @@ void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs, // when i64 and f64 are both passed in GPRs. StackOffset = SavedStackOffset; MaxStackArgAlign = SavedMaxStackArgAlign; - Locs.resize(NumLocs); + Locs.truncate(NumLocs); } void CCState::analyzeMustTailForwardedRegisters( @@ -240,8 +240,8 @@ void CCState::analyzeMustTailForwardedRegisters( // Oftentimes calling conventions will not user register parameters for // variadic functions, so we need to assume we're not variadic so that we get // all the registers that might be used in a non-variadic call. - SaveAndRestore<bool> SavedVarArg(IsVarArg, false); - SaveAndRestore<bool> SavedMustTail(AnalyzingMustTailForwardedRegs, true); + SaveAndRestore SavedVarArg(IsVarArg, false); + SaveAndRestore SavedMustTail(AnalyzingMustTailForwardedRegs, true); for (MVT RegVT : RegParmTypes) { SmallVector<MCPhysReg, 8> RemainingRegs; @@ -270,19 +270,20 @@ bool CCState::resultsCompatible(CallingConv::ID CalleeCC, CCState CCInfo2(CallerCC, false, MF, RVLocs2, C); CCInfo2.AnalyzeCallResult(Ins, CallerFn); - if (RVLocs1.size() != RVLocs2.size()) - return false; - for (unsigned I = 0, E = RVLocs1.size(); I != E; ++I) { - const CCValAssign &Loc1 = RVLocs1[I]; - const CCValAssign &Loc2 = RVLocs2[I]; - - if ( // Must both be in registers, or both in memory - Loc1.isRegLoc() != Loc2.isRegLoc() || - // Must fill the same part of their locations - Loc1.getLocInfo() != Loc2.getLocInfo() || - // Memory offset/register number must be the same - Loc1.getExtraInfo() != Loc2.getExtraInfo()) + auto AreCompatible = [](const CCValAssign &Loc1, const CCValAssign &Loc2) { + assert(!Loc1.isPendingLoc() && !Loc2.isPendingLoc() && + "The location must have been decided by now"); + // Must fill the same part of their locations. + if (Loc1.getLocInfo() != Loc2.getLocInfo()) return false; - } - return true; + // Must both be in the same registers, or both in memory at the same offset. + if (Loc1.isRegLoc() && Loc2.isRegLoc()) + return Loc1.getLocReg() == Loc2.getLocReg(); + if (Loc1.isMemLoc() && Loc2.isMemLoc()) + return Loc1.getLocMemOffset() == Loc2.getLocMemOffset(); + llvm_unreachable("Unknown location kind"); + }; + + return std::equal(RVLocs1.begin(), RVLocs1.end(), RVLocs2.begin(), + RVLocs2.end(), AreCompatible); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp index 5050395fbc0f..398ff56f737c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp @@ -19,6 +19,7 @@ using namespace llvm; /// initializeCodeGen - Initialize all passes linked into the CodeGen library. void llvm::initializeCodeGen(PassRegistry &Registry) { + initializeAssignmentTrackingAnalysisPass(Registry); initializeAtomicExpandPass(Registry); initializeBasicBlockSectionsPass(Registry); initializeBranchFolderPassPass(Registry); @@ -36,6 +37,8 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeEarlyIfPredicatorPass(Registry); initializeEarlyMachineLICMPass(Registry); initializeEarlyTailDuplicatePass(Registry); + initializeExpandLargeDivRemLegacyPassPass(Registry); + initializeExpandLargeFpConvertLegacyPassPass(Registry); initializeExpandMemCmpPassPass(Registry); initializeExpandPostRAPass(Registry); initializeFEntryInserterPass(Registry); @@ -68,6 +71,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMachineBlockFrequencyInfoPass(Registry); initializeMachineBlockPlacementPass(Registry); initializeMachineBlockPlacementStatsPass(Registry); + initializeMachineCFGPrinterPass(Registry); initializeMachineCSEPass(Registry); initializeMachineCombinerPass(Registry); initializeMachineCopyPropagationPass(Registry); @@ -75,18 +79,23 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMachineCycleInfoWrapperPassPass(Registry); initializeMachineDominatorTreePass(Registry); initializeMachineFunctionPrinterPassPass(Registry); + initializeMachineLateInstrsCleanupPass(Registry); initializeMachineLICMPass(Registry); initializeMachineLoopInfoPass(Registry); initializeMachineModuleInfoWrapperPassPass(Registry); initializeMachineOptimizationRemarkEmitterPassPass(Registry); initializeMachineOutlinerPass(Registry); initializeMachinePipelinerPass(Registry); + initializeMachineSanitizerBinaryMetadataPass(Registry); initializeModuloScheduleTestPass(Registry); initializeMachinePostDominatorTreePass(Registry); initializeMachineRegionInfoPassPass(Registry); initializeMachineSchedulerPass(Registry); initializeMachineSinkingPass(Registry); + initializeMachineUniformityAnalysisPassPass(Registry); + initializeMachineUniformityInfoPrinterPassPass(Registry); initializeMachineVerifierPassPass(Registry); + initializeObjCARCContractLegacyPassPass(Registry); initializeOptimizePHIsPass(Registry); initializePEIPass(Registry); initializePHIEliminationPass(Registry); @@ -113,6 +122,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeSjLjEHPreparePass(Registry); initializeSlotIndexesPass(Registry); initializeStackColoringPass(Registry); + initializeStackFrameLayoutAnalysisPassPass(Registry); initializeStackMapLivenessPass(Registry); initializeStackProtectorPass(Registry); initializeStackSlotColoringPass(Registry); @@ -120,7 +130,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeTailDuplicatePass(Registry); initializeTargetPassConfigPass(Registry); initializeTwoAddressInstructionPassPass(Registry); - initializeTypePromotionPass(Registry); + initializeTypePromotionLegacyPass(Registry); initializeUnpackMachineBundlesPass(Registry); initializeUnreachableBlockElimLegacyPassPass(Registry); initializeUnreachableMachineBlockElimPass(Registry); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp index 8f185a161bd0..a5215969c0dd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp @@ -17,6 +17,9 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/IR/DebugInfoMetadata.h" + +#define DEBUG_TYPE "codegen-common" using namespace llvm; @@ -100,8 +103,8 @@ static bool MIIsInTerminatorSequence(const MachineInstr &MI) { // Make sure that the copy dest is not a vreg when the copy source is a // physical register. - if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) && - Register::isPhysicalRegister(OPI2->getReg()))) + if (!OPI2->isReg() || + (!OPI->getReg().isPhysical() && OPI2->getReg().isPhysical())) return false; return true; @@ -197,3 +200,88 @@ unsigned llvm::getInvertedFPClassTest(unsigned Test) { } return 0; } + +static MachineOperand *getSalvageOpsForCopy(const MachineRegisterInfo &MRI, + MachineInstr &Copy) { + assert(Copy.getOpcode() == TargetOpcode::COPY && "Must be a COPY"); + + return &Copy.getOperand(1); +} + +static MachineOperand *getSalvageOpsForTrunc(const MachineRegisterInfo &MRI, + MachineInstr &Trunc, + SmallVectorImpl<uint64_t> &Ops) { + assert(Trunc.getOpcode() == TargetOpcode::G_TRUNC && "Must be a G_TRUNC"); + + const auto FromLLT = MRI.getType(Trunc.getOperand(1).getReg()); + const auto ToLLT = MRI.getType(Trunc.defs().begin()->getReg()); + + // TODO: Support non-scalar types. + if (!FromLLT.isScalar()) { + return nullptr; + } + + auto ExtOps = DIExpression::getExtOps(FromLLT.getSizeInBits(), + ToLLT.getSizeInBits(), false); + Ops.append(ExtOps.begin(), ExtOps.end()); + return &Trunc.getOperand(1); +} + +static MachineOperand *salvageDebugInfoImpl(const MachineRegisterInfo &MRI, + MachineInstr &MI, + SmallVectorImpl<uint64_t> &Ops) { + switch (MI.getOpcode()) { + case TargetOpcode::G_TRUNC: + return getSalvageOpsForTrunc(MRI, MI, Ops); + case TargetOpcode::COPY: + return getSalvageOpsForCopy(MRI, MI); + default: + return nullptr; + } +} + +void llvm::salvageDebugInfoForDbgValue(const MachineRegisterInfo &MRI, + MachineInstr &MI, + ArrayRef<MachineOperand *> DbgUsers) { + // These are arbitrary chosen limits on the maximum number of values and the + // maximum size of a debug expression we can salvage up to, used for + // performance reasons. + const unsigned MaxExpressionSize = 128; + + for (auto *DefMO : DbgUsers) { + MachineInstr *DbgMI = DefMO->getParent(); + if (DbgMI->isIndirectDebugValue()) { + continue; + } + + int UseMOIdx = DbgMI->findRegisterUseOperandIdx(DefMO->getReg()); + assert(UseMOIdx != -1 && DbgMI->hasDebugOperandForReg(DefMO->getReg()) && + "Must use salvaged instruction as its location"); + + // TODO: Support DBG_VALUE_LIST. + if (DbgMI->getOpcode() != TargetOpcode::DBG_VALUE) { + assert(DbgMI->getOpcode() == TargetOpcode::DBG_VALUE_LIST && + "Must be either DBG_VALUE or DBG_VALUE_LIST"); + continue; + } + + const DIExpression *SalvagedExpr = DbgMI->getDebugExpression(); + + SmallVector<uint64_t, 16> Ops; + auto Op0 = salvageDebugInfoImpl(MRI, MI, Ops); + if (!Op0) + continue; + SalvagedExpr = DIExpression::appendOpsToArg(SalvagedExpr, Ops, 0, true); + + bool IsValidSalvageExpr = + SalvagedExpr->getNumElements() <= MaxExpressionSize; + if (IsValidSalvageExpr) { + auto &UseMO = DbgMI->getOperand(UseMOIdx); + UseMO.setReg(Op0->getReg()); + UseMO.setSubReg(Op0->getSubReg()); + DbgMI->getDebugExpressionOp().setMetadata(SalvagedExpr); + + LLVM_DEBUG(dbgs() << "SALVAGE: " << *DbgMI << '\n'); + } + } +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp index b8f6fc9bbcde..dd431cc6f4f5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -65,6 +65,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" @@ -97,6 +98,7 @@ #include <iterator> #include <limits> #include <memory> +#include <optional> #include <utility> #include <vector> @@ -106,8 +108,8 @@ using namespace llvm::PatternMatch; #define DEBUG_TYPE "codegenprepare" STATISTIC(NumBlocksElim, "Number of blocks eliminated"); -STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); -STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); +STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); +STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"); STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " @@ -120,35 +122,36 @@ STATISTIC(NumMemoryInstsPhiCreated, STATISTIC(NumMemoryInstsSelectCreated, "Number of select created when address " "computations were sunk to memory instructions"); -STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); -STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); +STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); +STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); STATISTIC(NumAndsAdded, "Number of and mask instructions added to form ext loads"); STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized"); -STATISTIC(NumRetsDup, "Number of return instructions duplicated"); +STATISTIC(NumRetsDup, "Number of return instructions duplicated"); STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed"); static cl::opt<bool> DisableBranchOpts( - "disable-cgp-branch-opts", cl::Hidden, cl::init(false), - cl::desc("Disable branch optimizations in CodeGenPrepare")); + "disable-cgp-branch-opts", cl::Hidden, cl::init(false), + cl::desc("Disable branch optimizations in CodeGenPrepare")); static cl::opt<bool> DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare")); -static cl::opt<bool> DisableSelectToBranch( - "disable-cgp-select2branch", cl::Hidden, cl::init(false), - cl::desc("Disable select to branch conversion.")); +static cl::opt<bool> + DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, + cl::init(false), + cl::desc("Disable select to branch conversion.")); -static cl::opt<bool> AddrSinkUsingGEPs( - "addr-sink-using-gep", cl::Hidden, cl::init(true), - cl::desc("Address sinking in CGP using GEPs.")); +static cl::opt<bool> + AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), + cl::desc("Address sinking in CGP using GEPs.")); -static cl::opt<bool> EnableAndCmpSinking( - "enable-andcmp-sinking", cl::Hidden, cl::init(true), - cl::desc("Enable sinkinig and/cmp into branches.")); +static cl::opt<bool> + EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), + cl::desc("Enable sinkinig and/cmp into branches.")); static cl::opt<bool> DisableStoreExtract( "disable-cgp-store-extract", cl::Hidden, cl::init(false), @@ -204,10 +207,11 @@ static cl::opt<bool> ForceSplitStore( "force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says.")); -static cl::opt<bool> -EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, +static cl::opt<bool> EnableTypePromotionMerge( + "cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" - " the other."), cl::init(true)); + " the other."), + cl::init(true)); static cl::opt<bool> DisableComplexAddrModes( "disable-complex-addr-modes", cl::Hidden, cl::init(false), @@ -215,12 +219,12 @@ static cl::opt<bool> DisableComplexAddrModes( "in optimizeMemoryInst.")); static cl::opt<bool> -AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), - cl::desc("Allow creation of Phis in Address sinking.")); + AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), + cl::desc("Allow creation of Phis in Address sinking.")); -static cl::opt<bool> -AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), - cl::desc("Allow creation of selects in Address sinking.")); +static cl::opt<bool> AddrSinkNewSelects( + "addr-sink-new-select", cl::Hidden, cl::init(true), + cl::desc("Allow creation of selects in Address sinking.")); static cl::opt<bool> AddrSinkCombineBaseReg( "addr-sink-combine-base-reg", cl::Hidden, cl::init(true), @@ -252,200 +256,219 @@ static cl::opt<bool> cl::desc("Enable BFI update verification for " "CodeGenPrepare.")); -static cl::opt<bool> OptimizePhiTypes( - "cgp-optimize-phi-types", cl::Hidden, cl::init(false), - cl::desc("Enable converting phi types in CodeGenPrepare")); +static cl::opt<bool> + OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(false), + cl::desc("Enable converting phi types in CodeGenPrepare")); + +static cl::opt<unsigned> + HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, + cl::desc("Least BB number of huge function.")); namespace { enum ExtType { - ZeroExtension, // Zero extension has been seen. - SignExtension, // Sign extension has been seen. - BothExtension // This extension type is used if we saw sext after - // ZeroExtension had been set, or if we saw zext after - // SignExtension had been set. It makes the type - // information of a promoted instruction invalid. + ZeroExtension, // Zero extension has been seen. + SignExtension, // Sign extension has been seen. + BothExtension // This extension type is used if we saw sext after + // ZeroExtension had been set, or if we saw zext after + // SignExtension had been set. It makes the type + // information of a promoted instruction invalid. +}; + +enum ModifyDT { + NotModifyDT, // Not Modify any DT. + ModifyBBDT, // Modify the Basic Block Dominator Tree. + ModifyInstDT // Modify the Instruction Dominator in a Basic Block, + // This usually means we move/delete/insert instruction + // in a Basic Block. So we should re-iterate instructions + // in such Basic Block. }; using SetOfInstrs = SmallPtrSet<Instruction *, 16>; using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>; using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>; using SExts = SmallVector<Instruction *, 16>; -using ValueToSExts = DenseMap<Value *, SExts>; +using ValueToSExts = MapVector<Value *, SExts>; class TypePromotionTransaction; - class CodeGenPrepare : public FunctionPass { - const TargetMachine *TM = nullptr; - const TargetSubtargetInfo *SubtargetInfo; - const TargetLowering *TLI = nullptr; - const TargetRegisterInfo *TRI; - const TargetTransformInfo *TTI = nullptr; - const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr; - const TargetLibraryInfo *TLInfo; - const LoopInfo *LI; - std::unique_ptr<BlockFrequencyInfo> BFI; - std::unique_ptr<BranchProbabilityInfo> BPI; - ProfileSummaryInfo *PSI; - - /// As we scan instructions optimizing them, this is the next instruction - /// to optimize. Transforms that can invalidate this should update it. - BasicBlock::iterator CurInstIterator; - - /// Keeps track of non-local addresses that have been sunk into a block. - /// This allows us to avoid inserting duplicate code for blocks with - /// multiple load/stores of the same address. The usage of WeakTrackingVH - /// enables SunkAddrs to be treated as a cache whose entries can be - /// invalidated if a sunken address computation has been erased. - ValueMap<Value*, WeakTrackingVH> SunkAddrs; - - /// Keeps track of all instructions inserted for the current function. - SetOfInstrs InsertedInsts; - - /// Keeps track of the type of the related instruction before their - /// promotion for the current function. - InstrToOrigTy PromotedInsts; - - /// Keep track of instructions removed during promotion. - SetOfInstrs RemovedInsts; - - /// Keep track of sext chains based on their initial value. - DenseMap<Value *, Instruction *> SeenChainsForSExt; - - /// Keep track of GEPs accessing the same data structures such as structs or - /// arrays that are candidates to be split later because of their large - /// size. - MapVector< - AssertingVH<Value>, - SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>> - LargeOffsetGEPMap; - - /// Keep track of new GEP base after splitting the GEPs having large offset. - SmallSet<AssertingVH<Value>, 2> NewGEPBases; - - /// Map serial numbers to Large offset GEPs. - DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID; - - /// Keep track of SExt promoted. - ValueToSExts ValToSExtendedUses; - - /// True if the function has the OptSize attribute. - bool OptSize; - - /// DataLayout for the Function being processed. - const DataLayout *DL = nullptr; - - /// Building the dominator tree can be expensive, so we only build it - /// lazily and update it when required. - std::unique_ptr<DominatorTree> DT; +class CodeGenPrepare : public FunctionPass { + const TargetMachine *TM = nullptr; + const TargetSubtargetInfo *SubtargetInfo; + const TargetLowering *TLI = nullptr; + const TargetRegisterInfo *TRI; + const TargetTransformInfo *TTI = nullptr; + const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr; + const TargetLibraryInfo *TLInfo; + const LoopInfo *LI; + std::unique_ptr<BlockFrequencyInfo> BFI; + std::unique_ptr<BranchProbabilityInfo> BPI; + ProfileSummaryInfo *PSI; - public: - static char ID; // Pass identification, replacement for typeid + /// As we scan instructions optimizing them, this is the next instruction + /// to optimize. Transforms that can invalidate this should update it. + BasicBlock::iterator CurInstIterator; - CodeGenPrepare() : FunctionPass(ID) { - initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); - } + /// Keeps track of non-local addresses that have been sunk into a block. + /// This allows us to avoid inserting duplicate code for blocks with + /// multiple load/stores of the same address. The usage of WeakTrackingVH + /// enables SunkAddrs to be treated as a cache whose entries can be + /// invalidated if a sunken address computation has been erased. + ValueMap<Value *, WeakTrackingVH> SunkAddrs; - bool runOnFunction(Function &F) override; + /// Keeps track of all instructions inserted for the current function. + SetOfInstrs InsertedInsts; - StringRef getPassName() const override { return "CodeGen Prepare"; } + /// Keeps track of the type of the related instruction before their + /// promotion for the current function. + InstrToOrigTy PromotedInsts; - void getAnalysisUsage(AnalysisUsage &AU) const override { - // FIXME: When we can selectively preserve passes, preserve the domtree. - AU.addRequired<ProfileSummaryInfoWrapperPass>(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.addRequired<TargetPassConfig>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - AU.addRequired<LoopInfoWrapperPass>(); - AU.addUsedIfAvailable<BasicBlockSectionsProfileReader>(); - } + /// Keep track of instructions removed during promotion. + SetOfInstrs RemovedInsts; - private: - template <typename F> - void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) { - // Substituting can cause recursive simplifications, which can invalidate - // our iterator. Use a WeakTrackingVH to hold onto it in case this - // happens. - Value *CurValue = &*CurInstIterator; - WeakTrackingVH IterHandle(CurValue); + /// Keep track of sext chains based on their initial value. + DenseMap<Value *, Instruction *> SeenChainsForSExt; - f(); + /// Keep track of GEPs accessing the same data structures such as structs or + /// arrays that are candidates to be split later because of their large + /// size. + MapVector<AssertingVH<Value>, + SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>> + LargeOffsetGEPMap; - // If the iterator instruction was recursively deleted, start over at the - // start of the block. - if (IterHandle != CurValue) { - CurInstIterator = BB->begin(); - SunkAddrs.clear(); - } + /// Keep track of new GEP base after splitting the GEPs having large offset. + SmallSet<AssertingVH<Value>, 2> NewGEPBases; + + /// Map serial numbers to Large offset GEPs. + DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID; + + /// Keep track of SExt promoted. + ValueToSExts ValToSExtendedUses; + + /// True if the function has the OptSize attribute. + bool OptSize; + + /// DataLayout for the Function being processed. + const DataLayout *DL = nullptr; + + /// Building the dominator tree can be expensive, so we only build it + /// lazily and update it when required. + std::unique_ptr<DominatorTree> DT; + +public: + /// If encounter huge function, we need to limit the build time. + bool IsHugeFunc = false; + + /// FreshBBs is like worklist, it collected the updated BBs which need + /// to be optimized again. + /// Note: Consider building time in this pass, when a BB updated, we need + /// to insert such BB into FreshBBs for huge function. + SmallSet<BasicBlock *, 32> FreshBBs; + + static char ID; // Pass identification, replacement for typeid + + CodeGenPrepare() : FunctionPass(ID) { + initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + StringRef getPassName() const override { return "CodeGen Prepare"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + // FIXME: When we can selectively preserve passes, preserve the domtree. + AU.addRequired<ProfileSummaryInfoWrapperPass>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<TargetPassConfig>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addUsedIfAvailable<BasicBlockSectionsProfileReader>(); + } + +private: + template <typename F> + void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) { + // Substituting can cause recursive simplifications, which can invalidate + // our iterator. Use a WeakTrackingVH to hold onto it in case this + // happens. + Value *CurValue = &*CurInstIterator; + WeakTrackingVH IterHandle(CurValue); + + f(); + + // If the iterator instruction was recursively deleted, start over at the + // start of the block. + if (IterHandle != CurValue) { + CurInstIterator = BB->begin(); + SunkAddrs.clear(); } + } - // Get the DominatorTree, building if necessary. - DominatorTree &getDT(Function &F) { - if (!DT) - DT = std::make_unique<DominatorTree>(F); - return *DT; - } - - void removeAllAssertingVHReferences(Value *V); - bool eliminateAssumptions(Function &F); - bool eliminateFallThrough(Function &F); - bool eliminateMostlyEmptyBlocks(Function &F); - BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB); - bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; - void eliminateMostlyEmptyBlock(BasicBlock *BB); - bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB, - bool isPreheader); - bool makeBitReverse(Instruction &I); - bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT); - bool optimizeInst(Instruction *I, bool &ModifiedDT); - bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, - Type *AccessTy, unsigned AddrSpace); - bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr); - bool optimizeInlineAsmInst(CallInst *CS); - bool optimizeCallInst(CallInst *CI, bool &ModifiedDT); - bool optimizeExt(Instruction *&I); - bool optimizeExtUses(Instruction *I); - bool optimizeLoadExt(LoadInst *Load); - bool optimizeShiftInst(BinaryOperator *BO); - bool optimizeFunnelShift(IntrinsicInst *Fsh); - bool optimizeSelectInst(SelectInst *SI); - bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI); - bool optimizeSwitchType(SwitchInst *SI); - bool optimizeSwitchPhiConstants(SwitchInst *SI); - bool optimizeSwitchInst(SwitchInst *SI); - bool optimizeExtractElementInst(Instruction *Inst); - bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT); - bool fixupDbgValue(Instruction *I); - bool placeDbgValues(Function &F); - bool placePseudoProbes(Function &F); - bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts, - LoadInst *&LI, Instruction *&Inst, bool HasPromoted); - bool tryToPromoteExts(TypePromotionTransaction &TPT, - const SmallVectorImpl<Instruction *> &Exts, - SmallVectorImpl<Instruction *> &ProfitablyMovedExts, - unsigned CreatedInstsCost = 0); - bool mergeSExts(Function &F); - bool splitLargeGEPOffsets(); - bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited, - SmallPtrSetImpl<Instruction *> &DeletedInstrs); - bool optimizePhiTypes(Function &F); - bool performAddressTypePromotion( - Instruction *&Inst, - bool AllowPromotionWithoutCommonHeader, - bool HasPromoted, TypePromotionTransaction &TPT, - SmallVectorImpl<Instruction *> &SpeculativelyMovedExts); - bool splitBranchCondition(Function &F, bool &ModifiedDT); - bool simplifyOffsetableRelocate(GCStatepointInst &I); - - bool tryToSinkFreeOperands(Instruction *I); - bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, - Value *Arg1, CmpInst *Cmp, - Intrinsic::ID IID); - bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT); - bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT); - bool combineToUAddWithOverflow(CmpInst *Cmp, bool &ModifiedDT); - void verifyBFIUpdates(Function &F); - }; + // Get the DominatorTree, building if necessary. + DominatorTree &getDT(Function &F) { + if (!DT) + DT = std::make_unique<DominatorTree>(F); + return *DT; + } + + void removeAllAssertingVHReferences(Value *V); + bool eliminateAssumptions(Function &F); + bool eliminateFallThrough(Function &F); + bool eliminateMostlyEmptyBlocks(Function &F); + BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB); + bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; + void eliminateMostlyEmptyBlock(BasicBlock *BB); + bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB, + bool isPreheader); + bool makeBitReverse(Instruction &I); + bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT); + bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT); + bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy, + unsigned AddrSpace); + bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr); + bool optimizeInlineAsmInst(CallInst *CS); + bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT); + bool optimizeExt(Instruction *&I); + bool optimizeExtUses(Instruction *I); + bool optimizeLoadExt(LoadInst *Load); + bool optimizeShiftInst(BinaryOperator *BO); + bool optimizeFunnelShift(IntrinsicInst *Fsh); + bool optimizeSelectInst(SelectInst *SI); + bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI); + bool optimizeSwitchType(SwitchInst *SI); + bool optimizeSwitchPhiConstants(SwitchInst *SI); + bool optimizeSwitchInst(SwitchInst *SI); + bool optimizeExtractElementInst(Instruction *Inst); + bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT); + bool fixupDbgValue(Instruction *I); + bool placeDbgValues(Function &F); + bool placePseudoProbes(Function &F); + bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts, + LoadInst *&LI, Instruction *&Inst, bool HasPromoted); + bool tryToPromoteExts(TypePromotionTransaction &TPT, + const SmallVectorImpl<Instruction *> &Exts, + SmallVectorImpl<Instruction *> &ProfitablyMovedExts, + unsigned CreatedInstsCost = 0); + bool mergeSExts(Function &F); + bool splitLargeGEPOffsets(); + bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited, + SmallPtrSetImpl<Instruction *> &DeletedInstrs); + bool optimizePhiTypes(Function &F); + bool performAddressTypePromotion( + Instruction *&Inst, bool AllowPromotionWithoutCommonHeader, + bool HasPromoted, TypePromotionTransaction &TPT, + SmallVectorImpl<Instruction *> &SpeculativelyMovedExts); + bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT); + bool simplifyOffsetableRelocate(GCStatepointInst &I); + + bool tryToSinkFreeOperands(Instruction *I); + bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1, + CmpInst *Cmp, Intrinsic::ID IID); + bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT); + bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT); + bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT); + void verifyBFIUpdates(Function &F); +}; } // end anonymous namespace @@ -459,8 +482,8 @@ INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE, - "Optimize for code generation", false, false) +INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE, "Optimize for code generation", + false, false) FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); } @@ -474,6 +497,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // Clear per function information. InsertedInsts.clear(); PromotedInsts.clear(); + FreshBBs.clear(); TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); SubtargetInfo = TM->getSubtargetImpl(F); @@ -488,7 +512,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) { BBSectionsProfileReader = getAnalysisIfAvailable<BasicBlockSectionsProfileReader>(); OptSize = F.hasOptSize(); - // Use the basic-block-sections profile to promote hot functions to .text.hot if requested. + // Use the basic-block-sections profile to promote hot functions to .text.hot + // if requested. if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader && BBSectionsProfileReader->isFunctionHot(F.getName())) { F.setSectionPrefix("hot"); @@ -515,11 +540,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) { if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) { const DenseMap<unsigned int, unsigned int> &BypassWidths = TLI->getBypassSlowDivWidths(); - BasicBlock* BB = &*F.begin(); + BasicBlock *BB = &*F.begin(); while (BB != nullptr) { // bypassSlowDivision may create new BBs, but we don't want to reapply the // optimization to those blocks. - BasicBlock* Next = BB->getNextNode(); + BasicBlock *Next = BB->getNextNode(); // F.hasOptSize is already checked in the outer if statement. if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get())) EverMadeChange |= bypassSlowDivision(BB, BypassWidths); @@ -536,7 +561,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // unconditional branch. EverMadeChange |= eliminateMostlyEmptyBlocks(F); - bool ModifiedDT = false; + ModifyDT ModifiedDT = ModifyDT::NotModifyDT; if (!DisableBranchOpts) EverMadeChange |= splitBranchCondition(F, ModifiedDT); @@ -545,18 +570,51 @@ bool CodeGenPrepare::runOnFunction(Function &F) { EverMadeChange |= SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true); + // If we are optimzing huge function, we need to consider the build time. + // Because the basic algorithm's complex is near O(N!). + IsHugeFunc = F.size() > HugeFuncThresholdInCGPP; + bool MadeChange = true; + bool FuncIterated = false; while (MadeChange) { MadeChange = false; DT.reset(); + for (BasicBlock &BB : llvm::make_early_inc_range(F)) { - bool ModifiedDTOnIteration = false; - MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration); + if (FuncIterated && !FreshBBs.contains(&BB)) + continue; - // Restart BB iteration if the dominator tree of the Function was changed - if (ModifiedDTOnIteration) - break; + ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT; + bool Changed = optimizeBlock(BB, ModifiedDTOnIteration); + + MadeChange |= Changed; + if (IsHugeFunc) { + // If the BB is updated, it may still has chance to be optimized. + // This usually happen at sink optimization. + // For example: + // + // bb0: + // %and = and i32 %a, 4 + // %cmp = icmp eq i32 %and, 0 + // + // If the %cmp sink to other BB, the %and will has chance to sink. + if (Changed) + FreshBBs.insert(&BB); + else if (FuncIterated) + FreshBBs.erase(&BB); + + if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT) + DT.reset(); + } else { + // For small/normal functions, we restart BB iteration if the dominator + // tree of the Function was changed. + if (ModifiedDTOnIteration != ModifyDT::NotModifyDT) + break; + } } + // We have iterated all the BB in the (only work for huge) function. + FuncIterated = IsHugeFunc; + if (EnableTypePromotionMerge && !ValToSExtendedUses.empty()) MadeChange |= mergeSExts(F); if (!LargeOffsetGEPMap.empty()) @@ -586,11 +644,12 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // Use a set vector to get deterministic iteration order. The order the // blocks are removed may affect whether or not PHI nodes in successors // are removed. - SmallSetVector<BasicBlock*, 8> WorkList; + SmallSetVector<BasicBlock *, 8> WorkList; for (BasicBlock &BB : F) { SmallVector<BasicBlock *, 2> Successors(successors(&BB)); MadeChange |= ConstantFoldTerminator(&BB, true); - if (!MadeChange) continue; + if (!MadeChange) + continue; for (BasicBlock *Succ : Successors) if (pred_empty(Succ)) @@ -601,7 +660,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { MadeChange |= !WorkList.empty(); while (!WorkList.empty()) { BasicBlock *BB = WorkList.pop_back_val(); - SmallVector<BasicBlock*, 2> Successors(successors(BB)); + SmallVector<BasicBlock *, 2> Successors(successors(BB)); DeleteDeadBlock(BB); @@ -715,7 +774,8 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) { BasicBlock *SinglePred = BB->getSinglePredecessor(); // Don't merge if BB's address is taken. - if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue; + if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) + continue; BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); if (Term && !Term->isConditional()) { @@ -725,6 +785,12 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) { // Merge BB into SinglePred and delete it. MergeBlockIntoPredecessor(BB); Preds.insert(SinglePred); + + if (IsHugeFunc) { + // Update FreshBBs to optimize the merged BB. + FreshBBs.insert(SinglePred); + FreshBBs.erase(BB); + } } } @@ -837,9 +903,8 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, // such empty block (BB), ISel will place COPY instructions in BB, not in the // predecessor of BB. BasicBlock *Pred = BB->getUniquePredecessor(); - if (!Pred || - !(isa<SwitchInst>(Pred->getTerminator()) || - isa<IndirectBrInst>(Pred->getTerminator()))) + if (!Pred || !(isa<SwitchInst>(Pred->getTerminator()) || + isa<IndirectBrInst>(Pred->getTerminator()))) return true; if (BB->getTerminator() != BB->getFirstNonPHIOrDbg()) @@ -924,10 +989,11 @@ bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB, // and DestBB may have conflicting incoming values for the block. If so, we // can't merge the block. const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); - if (!DestBBPN) return true; // no conflict. + if (!DestBBPN) + return true; // no conflict. // Collect the preds of BB. - SmallPtrSet<const BasicBlock*, 16> BBPreds; + SmallPtrSet<const BasicBlock *, 16> BBPreds; if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { // It is faster to get preds from a PHI than with pred_iterator. for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) @@ -939,7 +1005,7 @@ bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB, // Walk the preds of DestBB. for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { BasicBlock *Pred = DestBBPN->getIncomingBlock(i); - if (BBPreds.count(Pred)) { // Common predecessor? + if (BBPreds.count(Pred)) { // Common predecessor? for (const PHINode &PN : DestBB->phis()) { const Value *V1 = PN.getIncomingValueForBlock(Pred); const Value *V2 = PN.getIncomingValueForBlock(BB); @@ -950,7 +1016,8 @@ bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB, V2 = V2PN->getIncomingValueForBlock(Pred); // If there is a conflict, bail out. - if (V1 != V2) return false; + if (V1 != V2) + return false; } } } @@ -958,6 +1025,22 @@ bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB, return true; } +/// Replace all old uses with new ones, and push the updated BBs into FreshBBs. +static void replaceAllUsesWith(Value *Old, Value *New, + SmallSet<BasicBlock *, 32> &FreshBBs, + bool IsHuge) { + auto *OldI = dyn_cast<Instruction>(Old); + if (OldI) { + for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + if (IsHuge) + FreshBBs.insert(User->getParent()); + } + } + Old->replaceAllUsesWith(New); +} + /// Eliminate a basic block that has only phi's and an unconditional branch in /// it. void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { @@ -978,6 +1061,12 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { // Note: BB(=SinglePred) will not be deleted on this path. // DestBB(=its single successor) is the one that was deleted. LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n"); + + if (IsHugeFunc) { + // Update FreshBBs to optimize the merged BB. + FreshBBs.insert(SinglePred); + FreshBBs.erase(DestBB); + } return; } } @@ -1129,31 +1218,34 @@ simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, // cases like this: // bb1: // ... - // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...) - // br label %merge + // %g1 = call coldcc i8 addrspace(1)* + // @llvm.experimental.gc.relocate.p1i8(...) br label %merge // // bb2: // ... - // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...) - // br label %merge + // %g2 = call coldcc i8 addrspace(1)* + // @llvm.experimental.gc.relocate.p1i8(...) br label %merge // // merge: // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ] // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)* // - // In this case, we can not find the bitcast any more. So we insert a new bitcast - // no matter there is already one or not. In this way, we can handle all cases, and - // the extra bitcast should be optimized away in later passes. + // In this case, we can not find the bitcast any more. So we insert a new + // bitcast no matter there is already one or not. In this way, we can handle + // all cases, and the extra bitcast should be optimized away in later + // passes. Value *ActualRelocatedBase = RelocatedBase; if (RelocatedBase->getType() != Base->getType()) { ActualRelocatedBase = Builder.CreateBitCast(RelocatedBase, Base->getType()); } - Value *Replacement = Builder.CreateGEP( - Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV)); + Value *Replacement = + Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase, + ArrayRef(OffsetV)); Replacement->takeName(ToReplace); - // If the newly generated derived pointer's type does not match the original derived - // pointer's type, cast the new derived pointer to match it. Same reasoning as above. + // If the newly generated derived pointer's type does not match the original + // derived pointer's type, cast the new derived pointer to match it. Same + // reasoning as above. Value *ActualReplacement = Replacement; if (Replacement->getType() != ToReplace->getType()) { ActualReplacement = @@ -1216,11 +1308,11 @@ static bool SinkCast(CastInst *CI) { BasicBlock *DefBB = CI->getParent(); /// InsertedCasts - Only insert a cast in each block once. - DenseMap<BasicBlock*, CastInst*> InsertedCasts; + DenseMap<BasicBlock *, CastInst *> InsertedCasts; bool MadeChange = false; for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); - UI != E; ) { + UI != E;) { Use &TheUse = UI.getUse(); Instruction *User = cast<Instruction>(*UI); @@ -1246,7 +1338,8 @@ static bool SinkCast(CastInst *CI) { continue; // If this user is in the same block as the cast, don't change the cast. - if (UserBB == DefBB) continue; + if (UserBB == DefBB) + continue; // If we have already inserted a cast into this block, use it. CastInst *&InsertedCast = InsertedCasts[UserBB]; @@ -1300,7 +1393,8 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, // If this is an extension, it will be a zero or sign extension, which // isn't a noop. - if (SrcVT.bitsLT(DstVT)) return false; + if (SrcVT.bitsLT(DstVT)) + return false; // If these values will be promoted, find out what they will be promoted // to. This helps us consider truncates on PPC as noop copies when they @@ -1322,7 +1416,7 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, // Match a simple increment by constant operation. Note that if a sub is // matched, the step is negated (as if the step had been canonicalized to // an add, even though we leave the instruction alone.) -bool matchIncrement(const Instruction* IVInc, Instruction *&LHS, +bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, Constant *&Step) { if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) || match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::uadd_with_overflow>( @@ -1339,21 +1433,21 @@ bool matchIncrement(const Instruction* IVInc, Instruction *&LHS, /// If given \p PN is an inductive variable with value IVInc coming from the /// backedge, and on each iteration it gets increased by Step, return pair -/// <IVInc, Step>. Otherwise, return None. -static Optional<std::pair<Instruction *, Constant *> > +/// <IVInc, Step>. Otherwise, return std::nullopt. +static std::optional<std::pair<Instruction *, Constant *>> getIVIncrement(const PHINode *PN, const LoopInfo *LI) { const Loop *L = LI->getLoopFor(PN->getParent()); if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch()) - return None; + return std::nullopt; auto *IVInc = dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch())); if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L) - return None; + return std::nullopt; Instruction *LHS = nullptr; Constant *Step = nullptr; if (matchIncrement(IVInc, LHS, Step) && LHS == PN) return std::make_pair(IVInc, Step); - return None; + return std::nullopt; } static bool isIVIncrement(const Value *V, const LoopInfo *LI) { @@ -1440,12 +1534,12 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1); if (BO->getOpcode() != Instruction::Xor) { Value *Math = Builder.CreateExtractValue(MathOV, 0, "math"); - BO->replaceAllUsesWith(Math); + replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc); } else assert(BO->hasOneUse() && "Patterns with XOr should use the BO only in the compare"); Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov"); - Cmp->replaceAllUsesWith(OV); + replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc); Cmp->eraseFromParent(); BO->eraseFromParent(); return true; @@ -1484,7 +1578,7 @@ static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, /// Try to combine the compare into a call to the llvm.uadd.with.overflow /// intrinsic. Return true if any changes were made. bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp, - bool &ModifiedDT) { + ModifyDT &ModifiedDT) { Value *A, *B; BinaryOperator *Add; if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) { @@ -1511,12 +1605,12 @@ bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp, return false; // Reset callers - do not crash by iterating over a dead instruction. - ModifiedDT = true; + ModifiedDT = ModifyDT::ModifyInstDT; return true; } bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp, - bool &ModifiedDT) { + ModifyDT &ModifiedDT) { // We are not expecting non-canonical/degenerate code. Just bail out. Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1); if (isa<Constant>(A) && isa<Constant>(B)) @@ -1574,7 +1668,7 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp, return false; // Reset callers - do not crash by iterating over a dead instruction. - ModifiedDT = true; + ModifiedDT = ModifyDT::ModifyInstDT; return true; } @@ -1593,11 +1687,11 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) { return false; // Only insert a cmp in each block once. - DenseMap<BasicBlock*, CmpInst*> InsertedCmps; + DenseMap<BasicBlock *, CmpInst *> InsertedCmps; bool MadeChange = false; for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end(); - UI != E; ) { + UI != E;) { Use &TheUse = UI.getUse(); Instruction *User = cast<Instruction>(*UI); @@ -1613,7 +1707,8 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) { BasicBlock *DefBB = Cmp->getParent(); // If this user is in the same block as the cmp, don't change the cmp. - if (UserBB == DefBB) continue; + if (UserBB == DefBB) + continue; // If we have already inserted a cmp into this block, use it. CmpInst *&InsertedCmp = InsertedCmps[UserBB]; @@ -1621,10 +1716,9 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) { if (!InsertedCmp) { BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); assert(InsertPt != UserBB->end()); - InsertedCmp = - CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), - Cmp->getOperand(0), Cmp->getOperand(1), "", - &*InsertPt); + InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), + Cmp->getOperand(0), Cmp->getOperand(1), "", + &*InsertPt); // Propagate the debug info. InsertedCmp->setDebugLoc(Cmp->getDebugLoc()); } @@ -1731,7 +1825,7 @@ static bool foldICmpWithDominatingICmp(CmpInst *Cmp, return true; } -bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) { +bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) { if (sinkCmpExpression(Cmp, *TLI)) return true; @@ -1752,14 +1846,13 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) { /// this operation can be combined. /// /// Return true if any changes are made. -static bool sinkAndCmp0Expression(Instruction *AndI, - const TargetLowering &TLI, +static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, SetOfInstrs &InsertedInsts) { // Double-check that we're not trying to optimize an instruction that was // already optimized by some other part of this pass. assert(!InsertedInsts.count(AndI) && "Attempting to optimize already optimized and instruction"); - (void) InsertedInsts; + (void)InsertedInsts; // Nothing to do for single use in same basic block. if (AndI->hasOneUse() && @@ -1795,7 +1888,7 @@ static bool sinkAndCmp0Expression(Instruction *AndI, // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any // others, so we don't need to keep track of which BBs we insert into. for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end(); - UI != E; ) { + UI != E;) { Use &TheUse = UI.getUse(); Instruction *User = cast<Instruction>(*UI); @@ -1976,11 +2069,11 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, // not have i16 compare. // cmp i16 trunc.result, opnd2 // - if (isa<TruncInst>(User) && shiftIsLegal + if (isa<TruncInst>(User) && + shiftIsLegal // If the type of the truncate is legal, no truncate will be // introduced in other basic blocks. - && - (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType())))) + && (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType())))) MadeChange = SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL); @@ -2037,20 +2130,21 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, /// If the transform is performed, return true and set ModifiedDT to true. static bool despeculateCountZeros(IntrinsicInst *CountZeros, const TargetLowering *TLI, - const DataLayout *DL, - bool &ModifiedDT) { + const DataLayout *DL, ModifyDT &ModifiedDT, + SmallSet<BasicBlock *, 32> &FreshBBs, + bool IsHugeFunc) { // If a zero input is undefined, it doesn't make sense to despeculate that. if (match(CountZeros->getOperand(1), m_One())) return false; // If it's cheap to speculate, there's nothing to do. + Type *Ty = CountZeros->getType(); auto IntrinsicID = CountZeros->getIntrinsicID(); - if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) || - (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz())) + if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) || + (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty))) return false; // Only handle legal scalar cases. Anything else requires too much work. - Type *Ty = CountZeros->getType(); unsigned SizeInBits = Ty->getScalarSizeInBits(); if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits()) return false; @@ -2063,12 +2157,16 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, // The intrinsic will be sunk behind a compare against zero and branch. BasicBlock *StartBlock = CountZeros->getParent(); BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false"); + if (IsHugeFunc) + FreshBBs.insert(CallBlock); // Create another block after the count zero intrinsic. A PHI will be added // in this block to select the result of the intrinsic or the bit-width // constant if the input to the intrinsic is zero. BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros)); BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end"); + if (IsHugeFunc) + FreshBBs.insert(EndBlock); // Set up a builder to create a compare, conditional branch, and PHI. IRBuilder<> Builder(CountZeros->getContext()); @@ -2089,7 +2187,7 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, // or the bit width of the operand. Builder.SetInsertPoint(&EndBlock->front()); PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz"); - CountZeros->replaceAllUsesWith(PN); + replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc); Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits)); PN->addIncoming(BitWidth, StartBlock); PN->addIncoming(CountZeros, CallBlock); @@ -2098,11 +2196,11 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, // undefined zero argument to 'true'. This will also prevent reprocessing the // intrinsic; we only despeculate when a zero input is defined. CountZeros->setArgOperand(1, Builder.getTrue()); - ModifiedDT = true; + ModifiedDT = ModifyDT::ModifyBBDT; return true; } -bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { +bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { BasicBlock *BB = CI->getParent(); // Lower inline assembly if we can. @@ -2152,23 +2250,22 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { GlobalVariable *GV; if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() && GV->getPointerAlignment(*DL) < PrefAlign && - DL->getTypeAllocSize(GV->getValueType()) >= - MinSize + Offset2) + DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2) GV->setAlignment(PrefAlign); } - // If this is a memcpy (or similar) then we may be able to improve the - // alignment - if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) { - Align DestAlign = getKnownAlignment(MI->getDest(), *DL); - MaybeAlign MIDestAlign = MI->getDestAlign(); - if (!MIDestAlign || DestAlign > *MIDestAlign) - MI->setDestAlignment(DestAlign); - if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { - MaybeAlign MTISrcAlign = MTI->getSourceAlign(); - Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL); - if (!MTISrcAlign || SrcAlign > *MTISrcAlign) - MTI->setSourceAlignment(SrcAlign); - } + } + // If this is a memcpy (or similar) then we may be able to improve the + // alignment. + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) { + Align DestAlign = getKnownAlignment(MI->getDest(), *DL); + MaybeAlign MIDestAlign = MI->getDestAlign(); + if (!MIDestAlign || DestAlign > *MIDestAlign) + MI->setDestAlignment(DestAlign); + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { + MaybeAlign MTISrcAlign = MTI->getSourceAlign(); + Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL); + if (!MTISrcAlign || SrcAlign > *MTISrcAlign) + MTI->setSourceAlignment(SrcAlign); } } @@ -2176,8 +2273,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { // cold block. This interacts with our handling for loads and stores to // ensure that we can fold all uses of a potential addressing computation // into their uses. TODO: generalize this to work over profiling data - if (CI->hasFnAttr(Attribute::Cold) && - !OptSize && !llvm::shouldOptimizeForSize(BB, PSI, BFI.get())) + if (CI->hasFnAttr(Attribute::Cold) && !OptSize && + !llvm::shouldOptimizeForSize(BB, PSI, BFI.get())) for (auto &Arg : CI->args()) { if (!Arg->getType()->isPointerTy()) continue; @@ -2188,7 +2285,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (II) { switch (II->getIntrinsicID()) { - default: break; + default: + break; case Intrinsic::assume: llvm_unreachable("llvm.assume should have been removed already"); case Intrinsic::experimental_widenable_condition: { @@ -2228,25 +2326,27 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { Value *ArgVal = II->getArgOperand(0); auto it = LargeOffsetGEPMap.find(II); if (it != LargeOffsetGEPMap.end()) { - // Merge entries in LargeOffsetGEPMap to reflect the RAUW. - // Make sure not to have to deal with iterator invalidation - // after possibly adding ArgVal to LargeOffsetGEPMap. - auto GEPs = std::move(it->second); - LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end()); - LargeOffsetGEPMap.erase(II); + // Merge entries in LargeOffsetGEPMap to reflect the RAUW. + // Make sure not to have to deal with iterator invalidation + // after possibly adding ArgVal to LargeOffsetGEPMap. + auto GEPs = std::move(it->second); + LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end()); + LargeOffsetGEPMap.erase(II); } - II->replaceAllUsesWith(ArgVal); + replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc); II->eraseFromParent(); return true; } case Intrinsic::cttz: case Intrinsic::ctlz: // If counting zeros is expensive, try to avoid it. - return despeculateCountZeros(II, TLI, DL, ModifiedDT); + return despeculateCountZeros(II, TLI, DL, ModifiedDT, FreshBBs, + IsHugeFunc); case Intrinsic::fshl: case Intrinsic::fshr: return optimizeFunnelShift(II); + case Intrinsic::dbg_assign: case Intrinsic::dbg_value: return fixupDbgValue(II); case Intrinsic::vscale: { @@ -2255,12 +2355,13 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { // to benefit from cheap constant propagation. Type *ScalableVectorTy = VectorType::get(Type::getInt8Ty(II->getContext()), 1, true); - if (DL->getTypeAllocSize(ScalableVectorTy).getKnownMinSize() == 8) { + if (DL->getTypeAllocSize(ScalableVectorTy).getKnownMinValue() == 8) { auto *Null = Constant::getNullValue(ScalableVectorTy->getPointerTo()); auto *One = ConstantInt::getSigned(II->getType(), 1); auto *CGep = ConstantExpr::getGetElementPtr(ScalableVectorTy, Null, One); - II->replaceAllUsesWith(ConstantExpr::getPtrToInt(CGep, II->getType())); + replaceAllUsesWith(II, ConstantExpr::getPtrToInt(CGep, II->getType()), + FreshBBs, IsHugeFunc); II->eraseFromParent(); return true; } @@ -2284,7 +2385,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { } // From here on out we're working with named functions. - if (!CI->getCalledFunction()) return false; + if (!CI->getCalledFunction()) + return false; // Lower all default uses of _chk calls. This is very similar // to what InstCombineCalls does, but here we are only lowering calls @@ -2293,7 +2395,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { FortifiedLibCallSimplifier Simplifier(TLInfo, true); IRBuilder<> Builder(CI); if (Value *V = Simplifier.optimizeCall(CI, Builder)) { - CI->replaceAllUsesWith(V); + replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc); CI->eraseFromParent(); return true; } @@ -2331,7 +2433,11 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { /// %tmp2 = tail call i32 @f2() /// ret i32 %tmp2 /// @endcode -bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT) { +bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, + ModifyDT &ModifiedDT) { + if (!BB->getTerminator()) + return false; + ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator()); if (!RetI) return false; @@ -2383,7 +2489,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail /// call. const Function *F = BB->getParent(); - SmallVector<BasicBlock*, 4> TailCallBBs; + SmallVector<BasicBlock *, 4> TailCallBBs; if (PN) { for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { // Look through bitcasts. @@ -2397,7 +2503,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT TailCallBBs.push_back(PredBB); } } else { - SmallPtrSet<BasicBlock*, 4> VisitedBBs; + SmallPtrSet<BasicBlock *, 4> VisitedBBs; for (BasicBlock *Pred : predecessors(BB)) { if (!VisitedBBs.insert(Pred).second) continue; @@ -2425,7 +2531,8 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT BFI->setBlockFreq( BB, (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)).getFrequency()); - ModifiedDT = Changed = true; + ModifiedDT = ModifyDT::ModifyBBDT; + Changed = true; ++NumRetsDup; } @@ -2451,16 +2558,15 @@ struct ExtAddrMode : public TargetLowering::AddrMode { bool InBounds = true; enum FieldName { - NoField = 0x00, - BaseRegField = 0x01, - BaseGVField = 0x02, - BaseOffsField = 0x04, + NoField = 0x00, + BaseRegField = 0x01, + BaseGVField = 0x02, + BaseOffsField = 0x04, ScaledRegField = 0x08, - ScaleField = 0x10, + ScaleField = 0x10, MultipleFields = 0xff }; - ExtAddrMode() = default; void print(raw_ostream &OS) const; @@ -2472,8 +2578,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode { if (BaseReg && other.BaseReg && BaseReg->getType() != other.BaseReg->getType()) return MultipleFields; - if (BaseGV && other.BaseGV && - BaseGV->getType() != other.BaseGV->getType()) + if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType()) return MultipleFields; if (ScaledReg && other.ScaledReg && ScaledReg->getType() != other.ScaledReg->getType()) @@ -2498,7 +2603,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode { if (Scale && other.Scale && Scale != other.Scale) Result |= ScaleField; - if (countPopulation(Result) > 1) + if (llvm::popcount(Result) > 1) return MultipleFields; else return static_cast<FieldName>(Result); @@ -2582,27 +2687,23 @@ void ExtAddrMode::print(raw_ostream &OS) const { if (InBounds) OS << "inbounds "; if (BaseGV) { - OS << (NeedPlus ? " + " : "") - << "GV:"; + OS << (NeedPlus ? " + " : "") << "GV:"; BaseGV->printAsOperand(OS, /*PrintType=*/false); NeedPlus = true; } if (BaseOffs) { - OS << (NeedPlus ? " + " : "") - << BaseOffs; + OS << (NeedPlus ? " + " : "") << BaseOffs; NeedPlus = true; } if (BaseReg) { - OS << (NeedPlus ? " + " : "") - << "Base:"; + OS << (NeedPlus ? " + " : "") << "Base:"; BaseReg->printAsOperand(OS, /*PrintType=*/false); NeedPlus = true; } if (Scale) { - OS << (NeedPlus ? " + " : "") - << Scale << "*"; + OS << (NeedPlus ? " + " : "") << Scale << "*"; ScaledReg->printAsOperand(OS, /*PrintType=*/false); } @@ -3034,7 +3135,8 @@ private: /// The ordered list of actions made so far. SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions; - using CommitPt = SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator; + using CommitPt = + SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator; SetOfInstrs &RemovedInsts; }; @@ -3065,24 +3167,23 @@ void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy)); } -Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, - Type *Ty) { +Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) { std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty)); Value *Val = Ptr->getBuiltValue(); Actions.push_back(std::move(Ptr)); return Val; } -Value *TypePromotionTransaction::createSExt(Instruction *Inst, - Value *Opnd, Type *Ty) { +Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd, + Type *Ty) { std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty)); Value *Val = Ptr->getBuiltValue(); Actions.push_back(std::move(Ptr)); return Val; } -Value *TypePromotionTransaction::createZExt(Instruction *Inst, - Value *Opnd, Type *Ty) { +Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd, + Type *Ty) { std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty)); Value *Val = Ptr->getBuiltValue(); Actions.push_back(std::move(Ptr)); @@ -3123,7 +3224,7 @@ namespace { /// /// This encapsulates the logic for matching the target-legal addressing modes. class AddressingModeMatcher { - SmallVectorImpl<Instruction*> &AddrModeInsts; + SmallVectorImpl<Instruction *> &AddrModeInsts; const TargetLowering &TLI; const TargetRegisterInfo &TRI; const DataLayout &DL; @@ -3165,8 +3266,8 @@ class AddressingModeMatcher { AddressingModeMatcher( SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI, const TargetRegisterInfo &TRI, const LoopInfo &LI, - const std::function<const DominatorTree &()> getDTFn, - Type *AT, unsigned AS, Instruction *MI, ExtAddrMode &AM, + const std::function<const DominatorTree &()> getDTFn, Type *AT, + unsigned AS, Instruction *MI, ExtAddrMode &AM, const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT, std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP, @@ -3198,11 +3299,13 @@ public: bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { ExtAddrMode Result; - bool Success = AddressingModeMatcher( - AddrModeInsts, TLI, TRI, LI, getDTFn, AccessTy, AS, MemoryInst, Result, - InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, - BFI).matchAddr(V, 0); - (void)Success; assert(Success && "Couldn't select *anything*?"); + bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn, + AccessTy, AS, MemoryInst, Result, + InsertedInsts, PromotedInsts, TPT, + LargeOffsetGEP, OptSize, PSI, BFI) + .matchAddr(V, 0); + (void)Success; + assert(Success && "Couldn't select *anything*?"); return Result; } @@ -3223,15 +3326,15 @@ class PhiNodeSet; /// An iterator for PhiNodeSet. class PhiNodeSetIterator { - PhiNodeSet * const Set; + PhiNodeSet *const Set; size_t CurrentIndex = 0; public: /// The constructor. Start should point to either a valid element, or be equal /// to the size of the underlying SmallVector of the PhiNodeSet. - PhiNodeSetIterator(PhiNodeSet * const Set, size_t Start); - PHINode * operator*() const; - PhiNodeSetIterator& operator++(); + PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start); + PHINode *operator*() const; + PhiNodeSetIterator &operator++(); bool operator==(const PhiNodeSetIterator &RHS) const; bool operator!=(const PhiNodeSetIterator &RHS) const; }; @@ -3250,7 +3353,7 @@ class PhiNodeSet { friend class PhiNodeSetIterator; using MapType = SmallDenseMap<PHINode *, size_t, 32>; - using iterator = PhiNodeSetIterator; + using iterator = PhiNodeSetIterator; /// Keeps the elements in the order of their insertion in the underlying /// vector. To achieve constant time removal, it never deletes any element. @@ -3309,14 +3412,10 @@ public: iterator end() { return PhiNodeSetIterator(this, NodeList.size()); } /// Returns the number of elements in the collection. - size_t size() const { - return NodeMap.size(); - } + size_t size() const { return NodeMap.size(); } /// \returns 1 if the given element is in the collection, and 0 if otherwise. - size_t count(PHINode *Ptr) const { - return NodeMap.count(Ptr); - } + size_t count(PHINode *Ptr) const { return NodeMap.count(Ptr); } private: /// Updates the CurrentIndex so that it will point to a valid element. @@ -3339,13 +3438,13 @@ private: PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start) : Set(Set), CurrentIndex(Start) {} -PHINode * PhiNodeSetIterator::operator*() const { +PHINode *PhiNodeSetIterator::operator*() const { assert(CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"); return Set->NodeList[CurrentIndex]; } -PhiNodeSetIterator& PhiNodeSetIterator::operator++() { +PhiNodeSetIterator &PhiNodeSetIterator::operator++() { assert(CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"); ++CurrentIndex; @@ -3374,8 +3473,7 @@ class SimplificationTracker { SmallPtrSet<SelectInst *, 32> AllSelectNodes; public: - SimplificationTracker(const SimplifyQuery &sq) - : SQ(sq) {} + SimplificationTracker(const SimplifyQuery &sq) : SQ(sq) {} Value *Get(Value *V) { do { @@ -3410,12 +3508,10 @@ public: return Get(Val); } - void Put(Value *From, Value *To) { - Storage.insert({ From, To }); - } + void Put(Value *From, Value *To) { Storage.insert({From, To}); } void ReplacePhi(PHINode *From, PHINode *To) { - Value* OldReplacement = Get(From); + Value *OldReplacement = Get(From); while (OldReplacement != From) { From = To; To = dyn_cast<PHINode>(OldReplacement); @@ -3428,7 +3524,7 @@ public: From->eraseFromParent(); } - PhiNodeSet& newPhiNodes() { return AllPhiNodes; } + PhiNodeSet &newPhiNodes() { return AllPhiNodes; } void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); } @@ -3483,9 +3579,7 @@ public: : SQ(_SQ), Original(OriginalValue) {} /// Get the combined AddrMode - const ExtAddrMode &getAddrMode() const { - return AddrModes[0]; - } + const ExtAddrMode &getAddrMode() const { return AddrModes[0]; } /// Add a new AddrMode if it's compatible with the AddrModes we already /// have. @@ -3506,7 +3600,7 @@ public: // can do just by comparing against the first one given that we only care // about the cumulative difference. ExtAddrMode::FieldName ThisDifferentField = - AddrModes[0].compare(NewAddrMode); + AddrModes[0].compare(NewAddrMode); if (DifferentField == ExtAddrMode::NoField) DifferentField = ThisDifferentField; else if (DifferentField != ThisDifferentField) @@ -3670,10 +3764,10 @@ private: SmallSetVector<PHIPair, 8> &Matcher, PhiNodeSet &PhiNodesToMatch) { SmallVector<PHIPair, 8> WorkList; - Matcher.insert({ PHI, Candidate }); + Matcher.insert({PHI, Candidate}); SmallSet<PHINode *, 8> MatchedPHIs; MatchedPHIs.insert(PHI); - WorkList.push_back({ PHI, Candidate }); + WorkList.push_back({PHI, Candidate}); SmallSet<PHIPair, 8> Visited; while (!WorkList.empty()) { auto Item = WorkList.pop_back_val(); @@ -3702,15 +3796,15 @@ private: return false; // If we already matched them then continue. - if (Matcher.count({ FirstPhi, SecondPhi })) + if (Matcher.count({FirstPhi, SecondPhi})) continue; // So the values are different and does not match. So we need them to // match. (But we register no more than one match per PHI node, so that // we won't later try to replace them twice.) if (MatchedPHIs.insert(FirstPhi).second) - Matcher.insert({ FirstPhi, SecondPhi }); + Matcher.insert({FirstPhi, SecondPhi}); // But me must check it. - WorkList.push_back({ FirstPhi, SecondPhi }); + WorkList.push_back({FirstPhi, SecondPhi}); } } return true; @@ -3900,7 +3994,8 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale, // to see if ScaleReg is actually X+C. If so, we can turn this into adding // X*Scale + C*Scale to addr mode. If we found available IV increment, do not // go any further: we can reuse it and cannot eliminate it. - ConstantInt *CI = nullptr; Value *AddLHS = nullptr; + ConstantInt *CI = nullptr; + Value *AddLHS = nullptr; if (isa<Instruction>(ScaleReg) && // not a constant expr. match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) && !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) { @@ -3921,26 +4016,26 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale, // If this is an add recurrence with a constant step, return the increment // instruction and the canonicalized step. - auto GetConstantStep = [this](const Value * V) - ->Optional<std::pair<Instruction *, APInt> > { + auto GetConstantStep = + [this](const Value *V) -> std::optional<std::pair<Instruction *, APInt>> { auto *PN = dyn_cast<PHINode>(V); if (!PN) - return None; + return std::nullopt; auto IVInc = getIVIncrement(PN, &LI); if (!IVInc) - return None; - // TODO: The result of the intrinsics above is two-compliment. However when + return std::nullopt; + // TODO: The result of the intrinsics above is two-complement. However when // IV inc is expressed as add or sub, iv.next is potentially a poison value. // If it has nuw or nsw flags, we need to make sure that these flags are // inferrable at the point of memory instruction. Otherwise we are replacing - // well-defined two-compliment computation with poison. Currently, to avoid + // well-defined two-complement computation with poison. Currently, to avoid // potentially complex analysis needed to prove this, we reject such cases. if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first)) if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap()) - return None; + return std::nullopt; if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second)) return std::make_pair(IVInc->first, ConstantStep->getValue()); - return None; + return std::nullopt; }; // Try to account for the following special case: @@ -4043,8 +4138,7 @@ class TypePromotionHelper { /// Utility function to add a promoted instruction \p ExtOpnd to /// \p PromotedInsts and record the type of extension we have seen. static void addPromotedInst(InstrToOrigTy &PromotedInsts, - Instruction *ExtOpnd, - bool IsSExt) { + Instruction *ExtOpnd, bool IsSExt) { ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension; InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd); if (It != PromotedInsts.end()) { @@ -4066,8 +4160,7 @@ class TypePromotionHelper { /// cannot use the information we had on the original type. /// BothExtension doesn't match any extension type. static const Type *getOrigType(const InstrToOrigTy &PromotedInsts, - Instruction *Opnd, - bool IsSExt) { + Instruction *Opnd, bool IsSExt) { ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension; InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd); if (It != PromotedInsts.end() && It->second.getInt() == ExtTy) @@ -4431,7 +4524,7 @@ Value *TypePromotionHelper::promoteOperandForOther( // If yes, create a new one. LLVM_DEBUG(dbgs() << "More operands to ext\n"); Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType()) - : TPT.createZExt(Ext, Opnd, Ext->getType()); + : TPT.createZExt(Ext, Opnd, Ext->getType()); if (!isa<Instruction>(ValForExtOpnd)) { TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd); continue; @@ -4496,7 +4589,8 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth, bool *MovedAway) { // Avoid exponential behavior on extremely deep expression trees. - if (Depth >= 5) return false; + if (Depth >= 5) + return false; // By default, all matched instructions stay in place. if (MovedAway) @@ -4525,8 +4619,8 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, return matchAddr(AddrInst->getOperand(0), Depth); return false; case Instruction::AddrSpaceCast: { - unsigned SrcAS - = AddrInst->getOperand(0)->getType()->getPointerAddressSpace(); + unsigned SrcAS = + AddrInst->getOperand(0)->getType()->getPointerAddressSpace(); unsigned DestAS = AddrInst->getType()->getPointerAddressSpace(); if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS)) return matchAddr(AddrInst->getOperand(0), Depth); @@ -4544,8 +4638,8 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, TPT.getRestorationPoint(); AddrMode.InBounds = false; - if (matchAddr(AddrInst->getOperand(1), Depth+1) && - matchAddr(AddrInst->getOperand(0), Depth+1)) + if (matchAddr(AddrInst->getOperand(1), Depth + 1) && + matchAddr(AddrInst->getOperand(0), Depth + 1)) return true; // Restore the old addr mode info. @@ -4554,8 +4648,8 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, TPT.rollback(LastKnownGood); // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. - if (matchAddr(AddrInst->getOperand(0), Depth+1) && - matchAddr(AddrInst->getOperand(1), Depth+1)) + if (matchAddr(AddrInst->getOperand(0), Depth + 1) && + matchAddr(AddrInst->getOperand(1), Depth + 1)) return true; // Otherwise we definitely can't merge the ADD in. @@ -4564,9 +4658,9 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, TPT.rollback(LastKnownGood); break; } - //case Instruction::Or: - // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. - //break; + // case Instruction::Or: + // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. + // break; case Instruction::Mul: case Instruction::Shl: { // Can only handle X*C and X << C. @@ -4592,7 +4686,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, if (StructType *STy = GTI.getStructTypeOrNull()) { const StructLayout *SL = DL.getStructLayout(STy); unsigned Idx = - cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); + cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); ConstantOffset += SL->getElementOffset(Idx); } else { TypeSize TS = DL.getTypeAllocSize(GTI.getIndexedType()); @@ -4600,7 +4694,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, // The optimisations below currently only work for fixed offsets. if (TS.isScalable()) return false; - int64_t TypeSize = TS.getFixedSize(); + int64_t TypeSize = TS.getFixedValue(); if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { const APInt &CVal = CI->getValue(); @@ -4627,7 +4721,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, if (ConstantOffset == 0 || TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) { // Check to see if we can fold the base pointer in too. - if (matchAddr(AddrInst->getOperand(0), Depth+1)) { + if (matchAddr(AddrInst->getOperand(0), Depth + 1)) { if (!cast<GEPOperator>(AddrInst)->isInBounds()) AddrMode.InBounds = false; return true; @@ -4667,7 +4761,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, AddrMode.InBounds = false; // Match the base operand of the GEP. - if (!matchAddr(AddrInst->getOperand(0), Depth+1)) { + if (!matchAddr(AddrInst->getOperand(0), Depth + 1)) { // If it couldn't be matched, just stuff the value in a register. if (AddrMode.HasBaseReg) { AddrMode = BackupAddrMode; @@ -4927,14 +5021,15 @@ static bool FindAllMemoryUses( if (CI->hasFnAttr(Attribute::Cold)) { // If this is a cold call, we can sink the addressing calculation into // the cold path. See optimizeCallInst - bool OptForSize = OptSize || - llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); + bool OptForSize = + OptSize || llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); if (!OptForSize) continue; } InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand()); - if (!IA) return true; + if (!IA) + return true; // If this is a memory operand, we're cool, otherwise bail out. if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI)) @@ -4954,14 +5049,16 @@ static bool FindAllMemoryUses( /// folding it into. If so, there is no cost to include it in the addressing /// mode. KnownLive1 and KnownLive2 are two values that we know are live at the /// instruction already. -bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, +bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val, + Value *KnownLive1, Value *KnownLive2) { // If Val is either of the known-live values, we know it is live! if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2) return true; // All values other than instructions and arguments (e.g. constants) are live. - if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true; + if (!isa<Instruction>(Val) && !isa<Argument>(Val)) + return true; // If Val is a constant sized alloca in the entry block, it is live, this is // true because it is just a reference to the stack/frame pointer, which is @@ -4997,10 +5094,10 @@ bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, /// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If /// X was live across 'load Z' for other reasons, we actually *would* want to /// fold the addressing mode in the Z case. This would make Y die earlier. -bool AddressingModeMatcher:: -isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, - ExtAddrMode &AMAfter) { - if (IgnoreProfitability) return true; +bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode( + Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) { + if (IgnoreProfitability) + return true; // AMBefore is the addressing mode before this instruction was folded into it, // and AMAfter is the addressing mode after the instruction was folded. Get @@ -5030,10 +5127,10 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, // for another (at worst.) In this context, folding an addressing mode into // the use is just a particularly nice way of sinking it. SmallVector<std::pair<Value *, Type *>, 16> MemoryUses; - SmallPtrSet<Instruction*, 16> ConsideredInsts; - if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, - PSI, BFI)) - return false; // Has a non-memory, non-foldable use! + SmallPtrSet<Instruction *, 16> ConsideredInsts; + if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, PSI, + BFI)) + return false; // Has a non-memory, non-foldable use! // Now that we know that all uses of this instruction are part of a chain of // computation involving only operations that could theoretically be folded @@ -5044,7 +5141,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, // (i.e. cold call sites), this serves as a way to prevent excessive code // growth since most architectures have some reasonable small and fast way to // compute an effective address. (i.e LEA on x86) - SmallVector<Instruction*, 32> MatchedAddrModeInsts; + SmallVector<Instruction *, 32> MatchedAddrModeInsts; for (const std::pair<Value *, Type *> &Pair : MemoryUses) { Value *Address = Pair.first; Type *AddressAccessTy = Pair.second; @@ -5064,7 +5161,8 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, LargeOffsetGEP, OptSize, PSI, BFI); Matcher.IgnoreProfitability = true; bool Success = Matcher.matchAddr(Address, 0); - (void)Success; assert(Success && "Couldn't select *anything*?"); + (void)Success; + assert(Success && "Couldn't select *anything*?"); // The match was to check the profitability, the changes made are not // part of the original matcher. Therefore, they should be dropped @@ -5114,15 +5212,15 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // Try to collapse single-value PHI nodes. This is necessary to undo // unprofitable PRE transformations. - SmallVector<Value*, 8> worklist; - SmallPtrSet<Value*, 16> Visited; + SmallVector<Value *, 8> worklist; + SmallPtrSet<Value *, 16> Visited; worklist.push_back(Addr); // Use a worklist to iteratively look through PHI and select nodes, and // ensure that the addressing mode obtained from the non-PHI/select roots of // the graph are compatible. bool PhiOrSelectSeen = false; - SmallVector<Instruction*, 16> AddrModeInsts; + SmallVector<Instruction *, 16> AddrModeInsts; const SimplifyQuery SQ(*DL, TLInfo); AddressingModeCombiner AddrModes(SQ, Addr); TypePromotionTransaction TPT(RemovedInsts); @@ -5202,12 +5300,12 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, ExtAddrMode AddrMode = AddrModes.getAddrMode(); // If all the instructions matched are already in this BB, don't do anything. - // If we saw a Phi node then it is not local definitely, and if we saw a select - // then we want to push the address calculation past it even if it's already - // in this BB. + // If we saw a Phi node then it is not local definitely, and if we saw a + // select then we want to push the address calculation past it even if it's + // already in this BB. if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) { return IsNonLocalValue(V, MemoryInst->getParent()); - })) { + })) { LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); return Modified; @@ -5226,7 +5324,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, WeakTrackingVH SunkAddrVH = SunkAddrs[Addr]; - Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; + Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); if (SunkAddr) { LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode @@ -5306,8 +5404,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, } } - if (!ResultPtr && - !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) { + if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale && + !AddrMode.BaseOffs) { SunkAddr = Constant::getNullValue(Addr->getType()); } else if (!ResultPtr) { return Modified; @@ -5336,7 +5434,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // done. } else { assert(cast<IntegerType>(IntPtrTy)->getBitWidth() < - cast<IntegerType>(V->getType())->getBitWidth() && + cast<IntegerType>(V->getType())->getBitWidth() && "We can't transform if ScaledReg is too narrow"); V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); } @@ -5582,11 +5680,10 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst, // If the final index isn't a vector, emit a scalar GEP containing all ops // and a vector GEP with all zeroes final index. if (!Ops[FinalIndex]->getType()->isVectorTy()) { - NewAddr = Builder.CreateGEP(SourceTy, Ops[0], - makeArrayRef(Ops).drop_front()); + NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front()); auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts); auto *SecondTy = GetElementPtrInst::getIndexedType( - SourceTy, makeArrayRef(Ops).drop_front()); + SourceTy, ArrayRef(Ops).drop_front()); NewAddr = Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy)); } else { @@ -5597,10 +5694,9 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst, if (Ops.size() != 2) { // Replace the last index with 0. Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy); - Base = Builder.CreateGEP(SourceTy, Base, - makeArrayRef(Ops).drop_front()); + Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front()); SourceTy = GetElementPtrInst::getIndexedType( - SourceTy, makeArrayRef(Ops).drop_front()); + SourceTy, ArrayRef(Ops).drop_front()); } // Now create the GEP with scalar pointer and vector index. @@ -5836,7 +5932,7 @@ bool CodeGenPrepare::mergeSExts(Function &F) { bool inserted = false; for (auto &Pt : CurPts) { if (getDT(F).dominates(Inst, Pt)) { - Pt->replaceAllUsesWith(Inst); + replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc); RemovedInsts.insert(Pt); Pt->removeFromParent(); Pt = Inst; @@ -5848,7 +5944,7 @@ bool CodeGenPrepare::mergeSExts(Function &F) { // Give up if we need to merge in a common dominator as the // experiments show it is not profitable. continue; - Inst->replaceAllUsesWith(Pt); + replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc); RemovedInsts.insert(Inst); Inst->removeFromParent(); inserted = true; @@ -6000,7 +6096,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() { if (GEP->getType() != I8PtrTy) NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType()); } - GEP->replaceAllUsesWith(NewGEP); + replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc); LargeOffsetGEPID.erase(GEP); LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP); GEP->eraseFromParent(); @@ -6026,6 +6122,7 @@ bool CodeGenPrepare::optimizePhiType( SmallVector<Instruction *, 4> Worklist; Worklist.push_back(cast<Instruction>(I)); SmallPtrSet<PHINode *, 4> PhiNodes; + SmallPtrSet<ConstantData *, 4> Constants; PhiNodes.insert(I); Visited.insert(I); SmallPtrSet<Instruction *, 4> Defs; @@ -6068,9 +6165,10 @@ bool CodeGenPrepare::optimizePhiType( AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) && !isa<ExtractElementInst>(OpBC->getOperand(0)); } - } else if (!isa<UndefValue>(V)) { + } else if (auto *OpC = dyn_cast<ConstantData>(V)) + Constants.insert(OpC); + else return false; - } } } @@ -6102,7 +6200,8 @@ bool CodeGenPrepare::optimizePhiType( } } - if (!ConvertTy || !AnyAnchored || !TLI->shouldConvertPhiType(PhiTy, ConvertTy)) + if (!ConvertTy || !AnyAnchored || + !TLI->shouldConvertPhiType(PhiTy, ConvertTy)) return false; LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to " @@ -6111,7 +6210,8 @@ bool CodeGenPrepare::optimizePhiType( // Create all the new phi nodes of the new type, and bitcast any loads to the // correct type. ValueToValueMap ValMap; - ValMap[UndefValue::get(PhiTy)] = UndefValue::get(ConvertTy); + for (ConstantData *C : Constants) + ValMap[C] = ConstantExpr::getCast(Instruction::BitCast, C, ConvertTy); for (Instruction *D : Defs) { if (isa<BitCastInst>(D)) { ValMap[D] = D->getOperand(0); @@ -6136,7 +6236,7 @@ bool CodeGenPrepare::optimizePhiType( for (Instruction *U : Uses) { if (isa<BitCastInst>(U)) { DeletedInstrs.insert(U); - U->replaceAllUsesWith(ValMap[U->getOperand(0)]); + replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc); } else { U->setOperand(0, new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc", U)); @@ -6164,7 +6264,7 @@ bool CodeGenPrepare::optimizePhiTypes(Function &F) { // Remove any old phi's that have been converted. for (auto *I : DeletedInstrs) { - I->replaceAllUsesWith(PoisonValue::get(I->getType())); + replaceAllUsesWith(I, PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc); I->eraseFromParent(); } @@ -6367,7 +6467,8 @@ bool CodeGenPrepare::optimizeExtUses(Instruction *I) { // Figure out which BB this ext is used in. BasicBlock *UserBB = UI->getParent(); - if (UserBB == DefBB) continue; + if (UserBB == DefBB) + continue; DefIsLiveOut = true; break; } @@ -6378,7 +6479,8 @@ bool CodeGenPrepare::optimizeExtUses(Instruction *I) { for (User *U : Src->users()) { Instruction *UI = cast<Instruction>(U); BasicBlock *UserBB = UI->getParent(); - if (UserBB == DefBB) continue; + if (UserBB == DefBB) + continue; // Be conservative. We don't want this xform to end up introducing // reloads just before load / store instructions. if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI)) @@ -6386,7 +6488,7 @@ bool CodeGenPrepare::optimizeExtUses(Instruction *I) { } // InsertedTruncs - Only insert one trunc in each block once. - DenseMap<BasicBlock*, Instruction*> InsertedTruncs; + DenseMap<BasicBlock *, Instruction *> InsertedTruncs; bool MadeChange = false; for (Use &U : Src->uses()) { @@ -6394,7 +6496,8 @@ bool CodeGenPrepare::optimizeExtUses(Instruction *I) { // Figure out which BB this ext is used in. BasicBlock *UserBB = User->getParent(); - if (UserBB == DefBB) continue; + if (UserBB == DefBB) + continue; // Both src and def are live in this block. Rewrite the use. Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; @@ -6576,7 +6679,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { // Replace all uses of load with new and (except for the use of load in the // new and itself). - Load->replaceAllUsesWith(NewAnd); + replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc); NewAnd->setOperand(0, Load); // Remove any and instructions that are now redundant. @@ -6584,7 +6687,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { // Check that the and mask is the same as the one we decided to put on the // new and. if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) { - And->replaceAllUsesWith(NewAnd); + replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc); if (&*CurInstIterator == And) CurInstIterator = std::next(And->getIterator()); And->eraseFromParent(); @@ -6602,8 +6705,7 @@ static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) { // If it's safe to speculatively execute, then it should not have side // effects; therefore, it's safe to sink and possibly *not* execute. return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) && - TTI->getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency) >= - TargetTransformInfo::TCC_Expensive; + TTI->isExpensiveToSpeculativelyExecute(I); } /// Returns true if a SelectInst should be turned into an explicit branch. @@ -6620,7 +6722,7 @@ static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, // If metadata tells us that the select condition is obviously predictable, // then we want to replace the select with a branch. uint64_t TrueWeight, FalseWeight; - if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { + if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) { uint64_t Max = std::max(TrueWeight, FalseWeight); uint64_t Sum = TrueWeight + FalseWeight; if (Sum != 0) { @@ -6651,9 +6753,9 @@ static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, /// false value of \p SI. If the true/false value of \p SI is defined by any /// select instructions in \p Selects, look through the defining select /// instruction until the true/false value is not defined in \p Selects. -static Value *getTrueOrFalseValue( - SelectInst *SI, bool isTrue, - const SmallPtrSet<const Instruction *, 2> &Selects) { +static Value * +getTrueOrFalseValue(SelectInst *SI, bool isTrue, + const SmallPtrSet<const Instruction *, 2> &Selects) { Value *V = nullptr; for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI); @@ -6695,7 +6797,7 @@ bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) { Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal); Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal); Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal); - Shift->replaceAllUsesWith(NewSel); + replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc); Shift->eraseFromParent(); return true; } @@ -6727,10 +6829,10 @@ bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) { IRBuilder<> Builder(Fsh); Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1); - Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, { X, Y, TVal }); - Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, { X, Y, FVal }); + Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, TVal}); + Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, FVal}); Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal); - Fsh->replaceAllUsesWith(NewSel); + replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc); Fsh->eraseFromParent(); return true; } @@ -6741,6 +6843,10 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { if (DisableSelectToBranch) return false; + // If the SelectOptimize pass is enabled, selects have already been optimized. + if (!getCGPassBuilderOption().DisableSelectOptimize) + return false; + // Find all consecutive select instructions that share the same condition. SmallVector<SelectInst *, 2> ASI; ASI.push_back(SI); @@ -6813,6 +6919,8 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { BasicBlock *StartBlock = SI->getParent(); BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI)); BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); + if (IsHugeFunc) + FreshBBs.insert(EndBlock); BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency()); // Delete the unconditional branch that was just created by the split. @@ -6833,6 +6941,8 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink", EndBlock->getParent(), EndBlock); TrueBranch = BranchInst::Create(EndBlock, TrueBlock); + if (IsHugeFunc) + FreshBBs.insert(TrueBlock); TrueBranch->setDebugLoc(SI->getDebugLoc()); } auto *TrueInst = cast<Instruction>(SI->getTrueValue()); @@ -6842,6 +6952,8 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { if (FalseBlock == nullptr) { FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink", EndBlock->getParent(), EndBlock); + if (IsHugeFunc) + FreshBBs.insert(FalseBlock); FalseBranch = BranchInst::Create(EndBlock, FalseBlock); FalseBranch->setDebugLoc(SI->getDebugLoc()); } @@ -6858,6 +6970,8 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", EndBlock->getParent(), EndBlock); + if (IsHugeFunc) + FreshBBs.insert(FalseBlock); auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); FalseBranch->setDebugLoc(SI->getDebugLoc()); } @@ -6897,7 +7011,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock); PN->setDebugLoc(SI->getDebugLoc()); - SI->replaceAllUsesWith(PN); + replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc); SI->eraseFromParent(); INS.erase(SI); ++NumSelectsExpanded; @@ -6935,9 +7049,10 @@ bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) { Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1); Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType); - SVI->replaceAllUsesWith(BC2); + replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc); RecursivelyDeleteTriviallyDeadInstructions( - SVI, TLInfo, nullptr, [&](Value *V) { removeAllAssertingVHReferences(V); }); + SVI, TLInfo, nullptr, + [&](Value *V) { removeAllAssertingVHReferences(V); }); // Also hoist the bitcast up to its operand if it they are not in the same // block. @@ -6987,6 +7102,18 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) { for (Use *U : ToReplace) { auto *UI = cast<Instruction>(U->get()); Instruction *NI = UI->clone(); + + if (IsHugeFunc) { + // Now we clone an instruction, its operands' defs may sink to this BB + // now. So we put the operands defs' BBs into FreshBBs to do optmization. + for (unsigned I = 0; I < NI->getNumOperands(); ++I) { + auto *OpDef = dyn_cast<Instruction>(NI->getOperand(I)); + if (!OpDef) + continue; + FreshBBs.insert(OpDef->getParent()); + } + } + NewInstructions[UI] = NI; MaybeDead.insert(UI); LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n"); @@ -7057,8 +7184,9 @@ bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) { SI->setCondition(ExtInst); for (auto Case : SI->cases()) { const APInt &NarrowConst = Case.getCaseValue()->getValue(); - APInt WideConst = (ExtType == Instruction::ZExt) ? - NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth); + APInt WideConst = (ExtType == Instruction::ZExt) + ? NarrowConst.zext(RegWidth) + : NarrowConst.sext(RegWidth); Case.setValue(ConstantInt::get(Context, WideConst)); } @@ -7255,11 +7383,11 @@ class VectorPromoteHelper { // The scalar chain of computation has to pay for the transition // scalar to vector. // The vector chain has to account for the combining cost. + enum TargetTransformInfo::TargetCostKind CostKind = + TargetTransformInfo::TCK_RecipThroughput; InstructionCost ScalarCost = - TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index); + TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index); InstructionCost VectorCost = StoreExtractCombineCost; - enum TargetTransformInfo::TargetCostKind CostKind = - TargetTransformInfo::TCK_RecipThroughput; for (const auto &Inst : InstsToBePromoted) { // Compute the cost. // By construction, all instructions being promoted are arithmetic ones. @@ -7268,17 +7396,16 @@ class VectorPromoteHelper { Value *Arg0 = Inst->getOperand(0); bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) || isa<ConstantFP>(Arg0); - TargetTransformInfo::OperandValueKind Arg0OVK = - IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue - : TargetTransformInfo::OK_AnyValue; - TargetTransformInfo::OperandValueKind Arg1OVK = - !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue - : TargetTransformInfo::OK_AnyValue; + TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info; + if (IsArg0Constant) + Arg0Info.Kind = TargetTransformInfo::OK_UniformConstantValue; + else + Arg1Info.Kind = TargetTransformInfo::OK_UniformConstantValue; + ScalarCost += TTI.getArithmeticInstrCost( - Inst->getOpcode(), Inst->getType(), CostKind, Arg0OVK, Arg1OVK); + Inst->getOpcode(), Inst->getType(), CostKind, Arg0Info, Arg1Info); VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType, - CostKind, - Arg0OVK, Arg1OVK); + CostKind, Arg0Info, Arg1Info); } LLVM_DEBUG( dbgs() << "Estimated cost of computation to be promoted:\nScalar: " @@ -7662,9 +7789,8 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, // type, and the second operand is a constant. static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP) { gep_type_iterator I = gep_type_begin(*GEP); - return GEP->getNumOperands() == 2 && - I.isSequential() && - isa<ConstantInt>(GEP->getOperand(1)); + return GEP->getNumOperands() == 2 && I.isSequential() && + isa<ConstantInt>(GEP->getOperand(1)); } // Try unmerging GEPs to reduce liveness interference (register pressure) across @@ -7737,8 +7863,8 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1)); // Check that GEPI is a cheap one. if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(), - TargetTransformInfo::TCK_SizeAndLatency) - > TargetTransformInfo::TCC_Basic) + TargetTransformInfo::TCK_SizeAndLatency) > + TargetTransformInfo::TCC_Basic) return false; Value *GEPIOp = GEPI->getOperand(0); // Check that GEPIOp is an instruction that's also defined in SrcBlock. @@ -7749,21 +7875,22 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, return false; // Check that GEP is used outside the block, meaning it's alive on the // IndirectBr edge(s). - if (find_if(GEPI->users(), [&](User *Usr) { + if (llvm::none_of(GEPI->users(), [&](User *Usr) { if (auto *I = dyn_cast<Instruction>(Usr)) { if (I->getParent() != SrcBlock) { return true; } } return false; - }) == GEPI->users().end()) + })) return false; // The second elements of the GEP chains to be unmerged. std::vector<GetElementPtrInst *> UGEPIs; // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive // on IndirectBr edges. for (User *Usr : GEPIOp->users()) { - if (Usr == GEPI) continue; + if (Usr == GEPI) + continue; // Check if Usr is an Instruction. If not, give up. if (!isa<Instruction>(Usr)) return false; @@ -7787,8 +7914,8 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, return false; ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1)); if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(), - TargetTransformInfo::TCK_SizeAndLatency) - > TargetTransformInfo::TCC_Basic) + TargetTransformInfo::TCK_SizeAndLatency) > + TargetTransformInfo::TCC_Basic) return false; UGEPIs.push_back(UGEPI); } @@ -7807,9 +7934,8 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, for (GetElementPtrInst *UGEPI : UGEPIs) { UGEPI->setOperand(0, GEPI); ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1)); - Constant *NewUGEPIIdx = - ConstantInt::get(GEPIIdx->getType(), - UGEPIIdx->getValue() - GEPIIdx->getValue()); + Constant *NewUGEPIIdx = ConstantInt::get( + GEPIIdx->getType(), UGEPIIdx->getValue() - GEPIIdx->getValue()); UGEPI->setOperand(1, NewUGEPIIdx); // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not // inbounds to avoid UB. @@ -7827,7 +7953,9 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, return true; } -static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI) { +static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI, + SmallSet<BasicBlock *, 32> &FreshBBs, + bool IsHugeFunc) { // Try and convert // %c = icmp ult %x, 8 // br %c, bla, blb @@ -7868,7 +7996,7 @@ static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI) { ConstantInt::get(UI->getType(), 0)); LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n"); LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n"); - Cmp->replaceAllUsesWith(NewCmp); + replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc); return true; } if (Cmp->isEquality() && @@ -7881,14 +8009,14 @@ static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI) { ConstantInt::get(UI->getType(), 0)); LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n"); LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n"); - Cmp->replaceAllUsesWith(NewCmp); + replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc); return true; } } return false; } -bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { +bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) { // Bail out if we inserted the instruction to prevent optimizations from // stepping on each other's toes. if (InsertedInsts.count(I)) @@ -7901,7 +8029,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { // trivial PHI, go ahead and zap it here. if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) { LargeOffsetGEPMap.erase(P); - P->replaceAllUsesWith(V); + replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc); P->eraseFromParent(); ++NumPHIsElim; return true; @@ -7922,6 +8050,11 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { if (OptimizeNoopCopyExpression(CI, *TLI, *DL)) return true; + if ((isa<UIToFPInst>(I) || isa<FPToUIInst>(I) || isa<TruncInst>(I)) && + TLI->optimizeExtendOrTruncateConversion(I, + LI->getLoopFor(I->getParent()))) + return true; + if (isa<ZExtInst>(I) || isa<SExtInst>(I)) { /// Sink a zext or sext into its user blocks if the target type doesn't /// fit in one register @@ -7930,6 +8063,10 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { TargetLowering::TypeExpandInteger) { return SinkCast(CI); } else { + if (TLI->optimizeExtendOrTruncateConversion( + I, LI->getLoopFor(I->getParent()))) + return true; + bool MadeChange = optimizeExt(I); return MadeChange | optimizeExtUses(I); } @@ -7959,15 +8096,14 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { } if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { - unsigned AS = RMW->getPointerAddressSpace(); - return optimizeMemoryInst(I, RMW->getPointerOperand(), - RMW->getType(), AS); + unsigned AS = RMW->getPointerAddressSpace(); + return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS); } if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) { - unsigned AS = CmpX->getPointerAddressSpace(); - return optimizeMemoryInst(I, CmpX->getPointerOperand(), - CmpX->getCompareOperand()->getType(), AS); + unsigned AS = CmpX->getPointerAddressSpace(); + return optimizeMemoryInst(I, CmpX->getPointerOperand(), + CmpX->getCompareOperand()->getType(), AS); } BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I); @@ -7991,7 +8127,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), GEPI->getName(), GEPI); NC->setDebugLoc(GEPI->getDebugLoc()); - GEPI->replaceAllUsesWith(NC); + replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc); GEPI->eraseFromParent(); ++NumGEPsElim; optimizeInst(NC, ModifiedDT); @@ -8024,7 +8160,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { F->takeName(FI); CmpI->setOperand(Const0 ? 1 : 0, F); } - FI->replaceAllUsesWith(CmpI); + replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc); FI->eraseFromParent(); return true; } @@ -8051,7 +8187,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { case Instruction::ExtractElement: return optimizeExtractElementInst(cast<ExtractElementInst>(I)); case Instruction::Br: - return optimizeBranch(cast<BranchInst>(I), *TLI); + return optimizeBranch(cast<BranchInst>(I), *TLI, FreshBBs, IsHugeFunc); } return false; @@ -8065,29 +8201,43 @@ bool CodeGenPrepare::makeBitReverse(Instruction &I) { TLI->getValueType(*DL, I.getType(), true))) return false; - SmallVector<Instruction*, 4> Insts; + SmallVector<Instruction *, 4> Insts; if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts)) return false; Instruction *LastInst = Insts.back(); - I.replaceAllUsesWith(LastInst); + replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc); RecursivelyDeleteTriviallyDeadInstructions( - &I, TLInfo, nullptr, [&](Value *V) { removeAllAssertingVHReferences(V); }); + &I, TLInfo, nullptr, + [&](Value *V) { removeAllAssertingVHReferences(V); }); return true; } // In this pass we look for GEP and cast instructions that are used // across basic blocks and rewrite them to improve basic-block-at-a-time // selection. -bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) { +bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) { SunkAddrs.clear(); bool MadeChange = false; - CurInstIterator = BB.begin(); - while (CurInstIterator != BB.end()) { - MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT); - if (ModifiedDT) - return true; - } + do { + CurInstIterator = BB.begin(); + ModifiedDT = ModifyDT::NotModifyDT; + while (CurInstIterator != BB.end()) { + MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT); + if (ModifiedDT != ModifyDT::NotModifyDT) { + // For huge function we tend to quickly go though the inner optmization + // opportunities in the BB. So we go back to the BB head to re-optimize + // each instruction instead of go back to the function head. + if (IsHugeFunc) { + DT.reset(); + getDT(*BB.getParent()); + break; + } else { + return true; + } + } + } + } while (ModifiedDT == ModifyDT::ModifyInstDT); bool MadeBitReverse = true; while (MadeBitReverse) { @@ -8176,7 +8326,7 @@ bool CodeGenPrepare::placeDbgValues(Function &F) { dbgs() << "Unable to find valid location for Debug Value, undefing:\n" << *DVI); - DVI->setUndef(); + DVI->setKillLocation(); break; } @@ -8247,7 +8397,7 @@ static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { /// /// FIXME: Remove the (equivalent?) implementation in SelectionDAG. /// -bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) { +bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) { if (!TM->Options.EnableFastISel || TLI->isJumpExpensive()) return false; @@ -8298,6 +8448,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) { auto *TmpBB = BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split", BB.getParent(), BB.getNextNode()); + if (IsHugeFunc) + FreshBBs.insert(TmpBB); // Update original basic block by using the first condition directly by the // branch instruction and removing the no longer needed and/or instruction. @@ -8333,7 +8485,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) { // Replace the old BB with the new BB. TBB->replacePhiUsesWith(&BB, TmpBB); - // Add another incoming edge form the new BB. + // Add another incoming edge from the new BB. for (PHINode &PN : FBB->phis()) { auto *Val = PN.getIncomingValueForBlock(&BB); PN.addIncoming(Val, TmpBB); @@ -8362,18 +8514,20 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) { // Another choice is to assume TrueProb for BB1 equals to TrueProb for // TmpBB, but the math is more complicated. uint64_t TrueWeight, FalseWeight; - if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) { + if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) { uint64_t NewTrueWeight = TrueWeight; uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight; scaleWeights(NewTrueWeight, NewFalseWeight); - Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext()) - .createBranchWeights(TrueWeight, FalseWeight)); + Br1->setMetadata(LLVMContext::MD_prof, + MDBuilder(Br1->getContext()) + .createBranchWeights(TrueWeight, FalseWeight)); NewTrueWeight = TrueWeight; NewFalseWeight = 2 * FalseWeight; scaleWeights(NewTrueWeight, NewFalseWeight); - Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext()) - .createBranchWeights(TrueWeight, FalseWeight)); + Br2->setMetadata(LLVMContext::MD_prof, + MDBuilder(Br2->getContext()) + .createBranchWeights(TrueWeight, FalseWeight)); } } else { // Codegen X & Y as: @@ -8395,22 +8549,24 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) { // assumes that // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. uint64_t TrueWeight, FalseWeight; - if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) { + if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) { uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight; uint64_t NewFalseWeight = FalseWeight; scaleWeights(NewTrueWeight, NewFalseWeight); - Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext()) - .createBranchWeights(TrueWeight, FalseWeight)); + Br1->setMetadata(LLVMContext::MD_prof, + MDBuilder(Br1->getContext()) + .createBranchWeights(TrueWeight, FalseWeight)); NewTrueWeight = 2 * TrueWeight; NewFalseWeight = FalseWeight; scaleWeights(NewTrueWeight, NewFalseWeight); - Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext()) - .createBranchWeights(TrueWeight, FalseWeight)); + Br2->setMetadata(LLVMContext::MD_prof, + MDBuilder(Br2->getContext()) + .createBranchWeights(TrueWeight, FalseWeight)); } } - ModifiedDT = true; + ModifiedDT = ModifyDT::ModifyBBDT; MadeChange = true; LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp index fd52191882cb..48cd8e998ec9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Host.h" #include "llvm/Support/MemoryBuffer.h" +#include <optional> using namespace llvm; @@ -40,14 +41,15 @@ using namespace llvm; return *NAME##View; \ } +// Temporary macro for incremental transition to std::optional. #define CGOPT_EXP(TY, NAME) \ CGOPT(TY, NAME) \ - Optional<TY> codegen::getExplicit##NAME() { \ + std::optional<TY> codegen::getExplicit##NAME() { \ if (NAME##View->getNumOccurrences()) { \ TY res = *NAME##View; \ return res; \ } \ - return None; \ + return std::nullopt; \ } CGOPT(std::string, MArch) @@ -357,7 +359,7 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { "relax-elf-relocations", cl::desc( "Emit GOTPCRELX/REX_GOTPCRELX instead of GOTPCREL on x86-64 ELF"), - cl::init(false)); + cl::init(true)); CGBINDOPT(RelaxELFRelocations); static cl::opt<bool> DataSections( @@ -590,8 +592,8 @@ std::string codegen::getFeaturesStr() { if (getMCPU() == "native") { StringMap<bool> HostFeatures; if (sys::getHostCPUFeatures(HostFeatures)) - for (auto &F : HostFeatures) - Features.AddFeature(F.first(), F.second); + for (const auto &[Feature, IsEnabled] : HostFeatures) + Features.AddFeature(Feature, IsEnabled); } for (auto const &MAttr : getMAttrs()) @@ -610,8 +612,8 @@ std::vector<std::string> codegen::getFeatureList() { if (getMCPU() == "native") { StringMap<bool> HostFeatures; if (sys::getHostCPUFeatures(HostFeatures)) - for (auto &F : HostFeatures) - Features.AddFeature(F.first(), F.second); + for (const auto &[Feature, IsEnabled] : HostFeatures) + Features.AddFeature(Feature, IsEnabled); } for (auto const &MAttr : getMAttrs()) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp new file mode 100644 index 000000000000..9b1f7117fa57 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp @@ -0,0 +1,889 @@ +//===- ComplexDeinterleavingPass.cpp --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Identification: +// This step is responsible for finding the patterns that can be lowered to +// complex instructions, and building a graph to represent the complex +// structures. Starting from the "Converging Shuffle" (a shuffle that +// reinterleaves the complex components, with a mask of <0, 2, 1, 3>), the +// operands are evaluated and identified as "Composite Nodes" (collections of +// instructions that can potentially be lowered to a single complex +// instruction). This is performed by checking the real and imaginary components +// and tracking the data flow for each component while following the operand +// pairs. Validity of each node is expected to be done upon creation, and any +// validation errors should halt traversal and prevent further graph +// construction. +// +// Replacement: +// This step traverses the graph built up by identification, delegating to the +// target to validate and generate the correct intrinsics, and plumbs them +// together connecting each end of the new intrinsics graph to the existing +// use-def chain. This step is assumed to finish successfully, as all +// information is expected to be correct by this point. +// +// +// Internal data structure: +// ComplexDeinterleavingGraph: +// Keeps references to all the valid CompositeNodes formed as part of the +// transformation, and every Instruction contained within said nodes. It also +// holds onto a reference to the root Instruction, and the root node that should +// replace it. +// +// ComplexDeinterleavingCompositeNode: +// A CompositeNode represents a single transformation point; each node should +// transform into a single complex instruction (ignoring vector splitting, which +// would generate more instructions per node). They are identified in a +// depth-first manner, traversing and identifying the operands of each +// instruction in the order they appear in the IR. +// Each node maintains a reference to its Real and Imaginary instructions, +// as well as any additional instructions that make up the identified operation +// (Internal instructions should only have uses within their containing node). +// A Node also contains the rotation and operation type that it represents. +// Operands contains pointers to other CompositeNodes, acting as the edges in +// the graph. ReplacementValue is the transformed Value* that has been emitted +// to the IR. +// +// Note: If the operation of a Node is Shuffle, only the Real, Imaginary, and +// ReplacementValue fields of that Node are relevant, where the ReplacementValue +// should be pre-populated. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ComplexDeinterleavingPass.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/InitializePasses.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/Local.h" +#include <algorithm> + +using namespace llvm; +using namespace PatternMatch; + +#define DEBUG_TYPE "complex-deinterleaving" + +STATISTIC(NumComplexTransformations, "Amount of complex patterns transformed"); + +static cl::opt<bool> ComplexDeinterleavingEnabled( + "enable-complex-deinterleaving", + cl::desc("Enable generation of complex instructions"), cl::init(true), + cl::Hidden); + +/// Checks the given mask, and determines whether said mask is interleaving. +/// +/// To be interleaving, a mask must alternate between `i` and `i + (Length / +/// 2)`, and must contain all numbers within the range of `[0..Length)` (e.g. a +/// 4x vector interleaving mask would be <0, 2, 1, 3>). +static bool isInterleavingMask(ArrayRef<int> Mask); + +/// Checks the given mask, and determines whether said mask is deinterleaving. +/// +/// To be deinterleaving, a mask must increment in steps of 2, and either start +/// with 0 or 1. +/// (e.g. an 8x vector deinterleaving mask would be either <0, 2, 4, 6> or +/// <1, 3, 5, 7>). +static bool isDeinterleavingMask(ArrayRef<int> Mask); + +namespace { + +class ComplexDeinterleavingLegacyPass : public FunctionPass { +public: + static char ID; + + ComplexDeinterleavingLegacyPass(const TargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM) { + initializeComplexDeinterleavingLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Complex Deinterleaving Pass"; + } + + bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.setPreservesCFG(); + } + +private: + const TargetMachine *TM; +}; + +class ComplexDeinterleavingGraph; +struct ComplexDeinterleavingCompositeNode { + + ComplexDeinterleavingCompositeNode(ComplexDeinterleavingOperation Op, + Instruction *R, Instruction *I) + : Operation(Op), Real(R), Imag(I) {} + +private: + friend class ComplexDeinterleavingGraph; + using NodePtr = std::shared_ptr<ComplexDeinterleavingCompositeNode>; + using RawNodePtr = ComplexDeinterleavingCompositeNode *; + +public: + ComplexDeinterleavingOperation Operation; + Instruction *Real; + Instruction *Imag; + + // Instructions that should only exist within this node, there should be no + // users of these instructions outside the node. An example of these would be + // the multiply instructions of a partial multiply operation. + SmallVector<Instruction *> InternalInstructions; + ComplexDeinterleavingRotation Rotation; + SmallVector<RawNodePtr> Operands; + Value *ReplacementNode = nullptr; + + void addInstruction(Instruction *I) { InternalInstructions.push_back(I); } + void addOperand(NodePtr Node) { Operands.push_back(Node.get()); } + + bool hasAllInternalUses(SmallPtrSet<Instruction *, 16> &AllInstructions); + + void dump() { dump(dbgs()); } + void dump(raw_ostream &OS) { + auto PrintValue = [&](Value *V) { + if (V) { + OS << "\""; + V->print(OS, true); + OS << "\"\n"; + } else + OS << "nullptr\n"; + }; + auto PrintNodeRef = [&](RawNodePtr Ptr) { + if (Ptr) + OS << Ptr << "\n"; + else + OS << "nullptr\n"; + }; + + OS << "- CompositeNode: " << this << "\n"; + OS << " Real: "; + PrintValue(Real); + OS << " Imag: "; + PrintValue(Imag); + OS << " ReplacementNode: "; + PrintValue(ReplacementNode); + OS << " Operation: " << (int)Operation << "\n"; + OS << " Rotation: " << ((int)Rotation * 90) << "\n"; + OS << " Operands: \n"; + for (const auto &Op : Operands) { + OS << " - "; + PrintNodeRef(Op); + } + OS << " InternalInstructions:\n"; + for (const auto &I : InternalInstructions) { + OS << " - \""; + I->print(OS, true); + OS << "\"\n"; + } + } +}; + +class ComplexDeinterleavingGraph { +public: + using NodePtr = ComplexDeinterleavingCompositeNode::NodePtr; + using RawNodePtr = ComplexDeinterleavingCompositeNode::RawNodePtr; + explicit ComplexDeinterleavingGraph(const TargetLowering *tl) : TL(tl) {} + +private: + const TargetLowering *TL; + Instruction *RootValue; + NodePtr RootNode; + SmallVector<NodePtr> CompositeNodes; + SmallPtrSet<Instruction *, 16> AllInstructions; + + NodePtr prepareCompositeNode(ComplexDeinterleavingOperation Operation, + Instruction *R, Instruction *I) { + return std::make_shared<ComplexDeinterleavingCompositeNode>(Operation, R, + I); + } + + NodePtr submitCompositeNode(NodePtr Node) { + CompositeNodes.push_back(Node); + AllInstructions.insert(Node->Real); + AllInstructions.insert(Node->Imag); + for (auto *I : Node->InternalInstructions) + AllInstructions.insert(I); + return Node; + } + + NodePtr getContainingComposite(Value *R, Value *I) { + for (const auto &CN : CompositeNodes) { + if (CN->Real == R && CN->Imag == I) + return CN; + } + return nullptr; + } + + /// Identifies a complex partial multiply pattern and its rotation, based on + /// the following patterns + /// + /// 0: r: cr + ar * br + /// i: ci + ar * bi + /// 90: r: cr - ai * bi + /// i: ci + ai * br + /// 180: r: cr - ar * br + /// i: ci - ar * bi + /// 270: r: cr + ai * bi + /// i: ci - ai * br + NodePtr identifyPartialMul(Instruction *Real, Instruction *Imag); + + /// Identify the other branch of a Partial Mul, taking the CommonOperandI that + /// is partially known from identifyPartialMul, filling in the other half of + /// the complex pair. + NodePtr identifyNodeWithImplicitAdd( + Instruction *I, Instruction *J, + std::pair<Instruction *, Instruction *> &CommonOperandI); + + /// Identifies a complex add pattern and its rotation, based on the following + /// patterns. + /// + /// 90: r: ar - bi + /// i: ai + br + /// 270: r: ar + bi + /// i: ai - br + NodePtr identifyAdd(Instruction *Real, Instruction *Imag); + + NodePtr identifyNode(Instruction *I, Instruction *J); + + Value *replaceNode(RawNodePtr Node); + +public: + void dump() { dump(dbgs()); } + void dump(raw_ostream &OS) { + for (const auto &Node : CompositeNodes) + Node->dump(OS); + } + + /// Returns false if the deinterleaving operation should be cancelled for the + /// current graph. + bool identifyNodes(Instruction *RootI); + + /// Perform the actual replacement of the underlying instruction graph. + /// Returns false if the deinterleaving operation should be cancelled for the + /// current graph. + void replaceNodes(); +}; + +class ComplexDeinterleaving { +public: + ComplexDeinterleaving(const TargetLowering *tl, const TargetLibraryInfo *tli) + : TL(tl), TLI(tli) {} + bool runOnFunction(Function &F); + +private: + bool evaluateBasicBlock(BasicBlock *B); + + const TargetLowering *TL = nullptr; + const TargetLibraryInfo *TLI = nullptr; +}; + +} // namespace + +char ComplexDeinterleavingLegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN(ComplexDeinterleavingLegacyPass, DEBUG_TYPE, + "Complex Deinterleaving", false, false) +INITIALIZE_PASS_END(ComplexDeinterleavingLegacyPass, DEBUG_TYPE, + "Complex Deinterleaving", false, false) + +PreservedAnalyses ComplexDeinterleavingPass::run(Function &F, + FunctionAnalysisManager &AM) { + const TargetLowering *TL = TM->getSubtargetImpl(F)->getTargetLowering(); + auto &TLI = AM.getResult<llvm::TargetLibraryAnalysis>(F); + if (!ComplexDeinterleaving(TL, &TLI).runOnFunction(F)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserve<FunctionAnalysisManagerModuleProxy>(); + return PA; +} + +FunctionPass *llvm::createComplexDeinterleavingPass(const TargetMachine *TM) { + return new ComplexDeinterleavingLegacyPass(TM); +} + +bool ComplexDeinterleavingLegacyPass::runOnFunction(Function &F) { + const auto *TL = TM->getSubtargetImpl(F)->getTargetLowering(); + auto TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + return ComplexDeinterleaving(TL, &TLI).runOnFunction(F); +} + +bool ComplexDeinterleaving::runOnFunction(Function &F) { + if (!ComplexDeinterleavingEnabled) { + LLVM_DEBUG( + dbgs() << "Complex deinterleaving has been explicitly disabled.\n"); + return false; + } + + if (!TL->isComplexDeinterleavingSupported()) { + LLVM_DEBUG( + dbgs() << "Complex deinterleaving has been disabled, target does " + "not support lowering of complex number operations.\n"); + return false; + } + + bool Changed = false; + for (auto &B : F) + Changed |= evaluateBasicBlock(&B); + + return Changed; +} + +static bool isInterleavingMask(ArrayRef<int> Mask) { + // If the size is not even, it's not an interleaving mask + if ((Mask.size() & 1)) + return false; + + int HalfNumElements = Mask.size() / 2; + for (int Idx = 0; Idx < HalfNumElements; ++Idx) { + int MaskIdx = Idx * 2; + if (Mask[MaskIdx] != Idx || Mask[MaskIdx + 1] != (Idx + HalfNumElements)) + return false; + } + + return true; +} + +static bool isDeinterleavingMask(ArrayRef<int> Mask) { + int Offset = Mask[0]; + int HalfNumElements = Mask.size() / 2; + + for (int Idx = 1; Idx < HalfNumElements; ++Idx) { + if (Mask[Idx] != (Idx * 2) + Offset) + return false; + } + + return true; +} + +bool ComplexDeinterleaving::evaluateBasicBlock(BasicBlock *B) { + bool Changed = false; + + SmallVector<Instruction *> DeadInstrRoots; + + for (auto &I : *B) { + auto *SVI = dyn_cast<ShuffleVectorInst>(&I); + if (!SVI) + continue; + + // Look for a shufflevector that takes separate vectors of the real and + // imaginary components and recombines them into a single vector. + if (!isInterleavingMask(SVI->getShuffleMask())) + continue; + + ComplexDeinterleavingGraph Graph(TL); + if (!Graph.identifyNodes(SVI)) + continue; + + Graph.replaceNodes(); + DeadInstrRoots.push_back(SVI); + Changed = true; + } + + for (const auto &I : DeadInstrRoots) { + if (!I || I->getParent() == nullptr) + continue; + llvm::RecursivelyDeleteTriviallyDeadInstructions(I, TLI); + } + + return Changed; +} + +ComplexDeinterleavingGraph::NodePtr +ComplexDeinterleavingGraph::identifyNodeWithImplicitAdd( + Instruction *Real, Instruction *Imag, + std::pair<Instruction *, Instruction *> &PartialMatch) { + LLVM_DEBUG(dbgs() << "identifyNodeWithImplicitAdd " << *Real << " / " << *Imag + << "\n"); + + if (!Real->hasOneUse() || !Imag->hasOneUse()) { + LLVM_DEBUG(dbgs() << " - Mul operand has multiple uses.\n"); + return nullptr; + } + + if (Real->getOpcode() != Instruction::FMul || + Imag->getOpcode() != Instruction::FMul) { + LLVM_DEBUG(dbgs() << " - Real or imaginary instruction is not fmul\n"); + return nullptr; + } + + Instruction *R0 = dyn_cast<Instruction>(Real->getOperand(0)); + Instruction *R1 = dyn_cast<Instruction>(Real->getOperand(1)); + Instruction *I0 = dyn_cast<Instruction>(Imag->getOperand(0)); + Instruction *I1 = dyn_cast<Instruction>(Imag->getOperand(1)); + if (!R0 || !R1 || !I0 || !I1) { + LLVM_DEBUG(dbgs() << " - Mul operand not Instruction\n"); + return nullptr; + } + + // A +/+ has a rotation of 0. If any of the operands are fneg, we flip the + // rotations and use the operand. + unsigned Negs = 0; + SmallVector<Instruction *> FNegs; + if (R0->getOpcode() == Instruction::FNeg || + R1->getOpcode() == Instruction::FNeg) { + Negs |= 1; + if (R0->getOpcode() == Instruction::FNeg) { + FNegs.push_back(R0); + R0 = dyn_cast<Instruction>(R0->getOperand(0)); + } else { + FNegs.push_back(R1); + R1 = dyn_cast<Instruction>(R1->getOperand(0)); + } + if (!R0 || !R1) + return nullptr; + } + if (I0->getOpcode() == Instruction::FNeg || + I1->getOpcode() == Instruction::FNeg) { + Negs |= 2; + Negs ^= 1; + if (I0->getOpcode() == Instruction::FNeg) { + FNegs.push_back(I0); + I0 = dyn_cast<Instruction>(I0->getOperand(0)); + } else { + FNegs.push_back(I1); + I1 = dyn_cast<Instruction>(I1->getOperand(0)); + } + if (!I0 || !I1) + return nullptr; + } + + ComplexDeinterleavingRotation Rotation = (ComplexDeinterleavingRotation)Negs; + + Instruction *CommonOperand; + Instruction *UncommonRealOp; + Instruction *UncommonImagOp; + + if (R0 == I0 || R0 == I1) { + CommonOperand = R0; + UncommonRealOp = R1; + } else if (R1 == I0 || R1 == I1) { + CommonOperand = R1; + UncommonRealOp = R0; + } else { + LLVM_DEBUG(dbgs() << " - No equal operand\n"); + return nullptr; + } + + UncommonImagOp = (CommonOperand == I0) ? I1 : I0; + if (Rotation == ComplexDeinterleavingRotation::Rotation_90 || + Rotation == ComplexDeinterleavingRotation::Rotation_270) + std::swap(UncommonRealOp, UncommonImagOp); + + // Between identifyPartialMul and here we need to have found a complete valid + // pair from the CommonOperand of each part. + if (Rotation == ComplexDeinterleavingRotation::Rotation_0 || + Rotation == ComplexDeinterleavingRotation::Rotation_180) + PartialMatch.first = CommonOperand; + else + PartialMatch.second = CommonOperand; + + if (!PartialMatch.first || !PartialMatch.second) { + LLVM_DEBUG(dbgs() << " - Incomplete partial match\n"); + return nullptr; + } + + NodePtr CommonNode = identifyNode(PartialMatch.first, PartialMatch.second); + if (!CommonNode) { + LLVM_DEBUG(dbgs() << " - No CommonNode identified\n"); + return nullptr; + } + + NodePtr UncommonNode = identifyNode(UncommonRealOp, UncommonImagOp); + if (!UncommonNode) { + LLVM_DEBUG(dbgs() << " - No UncommonNode identified\n"); + return nullptr; + } + + NodePtr Node = prepareCompositeNode( + ComplexDeinterleavingOperation::CMulPartial, Real, Imag); + Node->Rotation = Rotation; + Node->addOperand(CommonNode); + Node->addOperand(UncommonNode); + Node->InternalInstructions.append(FNegs); + return submitCompositeNode(Node); +} + +ComplexDeinterleavingGraph::NodePtr +ComplexDeinterleavingGraph::identifyPartialMul(Instruction *Real, + Instruction *Imag) { + LLVM_DEBUG(dbgs() << "identifyPartialMul " << *Real << " / " << *Imag + << "\n"); + // Determine rotation + ComplexDeinterleavingRotation Rotation; + if (Real->getOpcode() == Instruction::FAdd && + Imag->getOpcode() == Instruction::FAdd) + Rotation = ComplexDeinterleavingRotation::Rotation_0; + else if (Real->getOpcode() == Instruction::FSub && + Imag->getOpcode() == Instruction::FAdd) + Rotation = ComplexDeinterleavingRotation::Rotation_90; + else if (Real->getOpcode() == Instruction::FSub && + Imag->getOpcode() == Instruction::FSub) + Rotation = ComplexDeinterleavingRotation::Rotation_180; + else if (Real->getOpcode() == Instruction::FAdd && + Imag->getOpcode() == Instruction::FSub) + Rotation = ComplexDeinterleavingRotation::Rotation_270; + else { + LLVM_DEBUG(dbgs() << " - Unhandled rotation.\n"); + return nullptr; + } + + if (!Real->getFastMathFlags().allowContract() || + !Imag->getFastMathFlags().allowContract()) { + LLVM_DEBUG(dbgs() << " - Contract is missing from the FastMath flags.\n"); + return nullptr; + } + + Value *CR = Real->getOperand(0); + Instruction *RealMulI = dyn_cast<Instruction>(Real->getOperand(1)); + if (!RealMulI) + return nullptr; + Value *CI = Imag->getOperand(0); + Instruction *ImagMulI = dyn_cast<Instruction>(Imag->getOperand(1)); + if (!ImagMulI) + return nullptr; + + if (!RealMulI->hasOneUse() || !ImagMulI->hasOneUse()) { + LLVM_DEBUG(dbgs() << " - Mul instruction has multiple uses\n"); + return nullptr; + } + + Instruction *R0 = dyn_cast<Instruction>(RealMulI->getOperand(0)); + Instruction *R1 = dyn_cast<Instruction>(RealMulI->getOperand(1)); + Instruction *I0 = dyn_cast<Instruction>(ImagMulI->getOperand(0)); + Instruction *I1 = dyn_cast<Instruction>(ImagMulI->getOperand(1)); + if (!R0 || !R1 || !I0 || !I1) { + LLVM_DEBUG(dbgs() << " - Mul operand not Instruction\n"); + return nullptr; + } + + Instruction *CommonOperand; + Instruction *UncommonRealOp; + Instruction *UncommonImagOp; + + if (R0 == I0 || R0 == I1) { + CommonOperand = R0; + UncommonRealOp = R1; + } else if (R1 == I0 || R1 == I1) { + CommonOperand = R1; + UncommonRealOp = R0; + } else { + LLVM_DEBUG(dbgs() << " - No equal operand\n"); + return nullptr; + } + + UncommonImagOp = (CommonOperand == I0) ? I1 : I0; + if (Rotation == ComplexDeinterleavingRotation::Rotation_90 || + Rotation == ComplexDeinterleavingRotation::Rotation_270) + std::swap(UncommonRealOp, UncommonImagOp); + + std::pair<Instruction *, Instruction *> PartialMatch( + (Rotation == ComplexDeinterleavingRotation::Rotation_0 || + Rotation == ComplexDeinterleavingRotation::Rotation_180) + ? CommonOperand + : nullptr, + (Rotation == ComplexDeinterleavingRotation::Rotation_90 || + Rotation == ComplexDeinterleavingRotation::Rotation_270) + ? CommonOperand + : nullptr); + NodePtr CNode = identifyNodeWithImplicitAdd( + cast<Instruction>(CR), cast<Instruction>(CI), PartialMatch); + if (!CNode) { + LLVM_DEBUG(dbgs() << " - No cnode identified\n"); + return nullptr; + } + + NodePtr UncommonRes = identifyNode(UncommonRealOp, UncommonImagOp); + if (!UncommonRes) { + LLVM_DEBUG(dbgs() << " - No UncommonRes identified\n"); + return nullptr; + } + + assert(PartialMatch.first && PartialMatch.second); + NodePtr CommonRes = identifyNode(PartialMatch.first, PartialMatch.second); + if (!CommonRes) { + LLVM_DEBUG(dbgs() << " - No CommonRes identified\n"); + return nullptr; + } + + NodePtr Node = prepareCompositeNode( + ComplexDeinterleavingOperation::CMulPartial, Real, Imag); + Node->addInstruction(RealMulI); + Node->addInstruction(ImagMulI); + Node->Rotation = Rotation; + Node->addOperand(CommonRes); + Node->addOperand(UncommonRes); + Node->addOperand(CNode); + return submitCompositeNode(Node); +} + +ComplexDeinterleavingGraph::NodePtr +ComplexDeinterleavingGraph::identifyAdd(Instruction *Real, Instruction *Imag) { + LLVM_DEBUG(dbgs() << "identifyAdd " << *Real << " / " << *Imag << "\n"); + + // Determine rotation + ComplexDeinterleavingRotation Rotation; + if ((Real->getOpcode() == Instruction::FSub && + Imag->getOpcode() == Instruction::FAdd) || + (Real->getOpcode() == Instruction::Sub && + Imag->getOpcode() == Instruction::Add)) + Rotation = ComplexDeinterleavingRotation::Rotation_90; + else if ((Real->getOpcode() == Instruction::FAdd && + Imag->getOpcode() == Instruction::FSub) || + (Real->getOpcode() == Instruction::Add && + Imag->getOpcode() == Instruction::Sub)) + Rotation = ComplexDeinterleavingRotation::Rotation_270; + else { + LLVM_DEBUG(dbgs() << " - Unhandled case, rotation is not assigned.\n"); + return nullptr; + } + + auto *AR = dyn_cast<Instruction>(Real->getOperand(0)); + auto *BI = dyn_cast<Instruction>(Real->getOperand(1)); + auto *AI = dyn_cast<Instruction>(Imag->getOperand(0)); + auto *BR = dyn_cast<Instruction>(Imag->getOperand(1)); + + if (!AR || !AI || !BR || !BI) { + LLVM_DEBUG(dbgs() << " - Not all operands are instructions.\n"); + return nullptr; + } + + NodePtr ResA = identifyNode(AR, AI); + if (!ResA) { + LLVM_DEBUG(dbgs() << " - AR/AI is not identified as a composite node.\n"); + return nullptr; + } + NodePtr ResB = identifyNode(BR, BI); + if (!ResB) { + LLVM_DEBUG(dbgs() << " - BR/BI is not identified as a composite node.\n"); + return nullptr; + } + + NodePtr Node = + prepareCompositeNode(ComplexDeinterleavingOperation::CAdd, Real, Imag); + Node->Rotation = Rotation; + Node->addOperand(ResA); + Node->addOperand(ResB); + return submitCompositeNode(Node); +} + +static bool isInstructionPairAdd(Instruction *A, Instruction *B) { + unsigned OpcA = A->getOpcode(); + unsigned OpcB = B->getOpcode(); + + return (OpcA == Instruction::FSub && OpcB == Instruction::FAdd) || + (OpcA == Instruction::FAdd && OpcB == Instruction::FSub) || + (OpcA == Instruction::Sub && OpcB == Instruction::Add) || + (OpcA == Instruction::Add && OpcB == Instruction::Sub); +} + +static bool isInstructionPairMul(Instruction *A, Instruction *B) { + auto Pattern = + m_BinOp(m_FMul(m_Value(), m_Value()), m_FMul(m_Value(), m_Value())); + + return match(A, Pattern) && match(B, Pattern); +} + +ComplexDeinterleavingGraph::NodePtr +ComplexDeinterleavingGraph::identifyNode(Instruction *Real, Instruction *Imag) { + LLVM_DEBUG(dbgs() << "identifyNode on " << *Real << " / " << *Imag << "\n"); + if (NodePtr CN = getContainingComposite(Real, Imag)) { + LLVM_DEBUG(dbgs() << " - Folding to existing node\n"); + return CN; + } + + auto *RealShuffle = dyn_cast<ShuffleVectorInst>(Real); + auto *ImagShuffle = dyn_cast<ShuffleVectorInst>(Imag); + if (RealShuffle && ImagShuffle) { + Value *RealOp1 = RealShuffle->getOperand(1); + if (!isa<UndefValue>(RealOp1) && !isa<ConstantAggregateZero>(RealOp1)) { + LLVM_DEBUG(dbgs() << " - RealOp1 is not undef or zero.\n"); + return nullptr; + } + Value *ImagOp1 = ImagShuffle->getOperand(1); + if (!isa<UndefValue>(ImagOp1) && !isa<ConstantAggregateZero>(ImagOp1)) { + LLVM_DEBUG(dbgs() << " - ImagOp1 is not undef or zero.\n"); + return nullptr; + } + + Value *RealOp0 = RealShuffle->getOperand(0); + Value *ImagOp0 = ImagShuffle->getOperand(0); + + if (RealOp0 != ImagOp0) { + LLVM_DEBUG(dbgs() << " - Shuffle operands are not equal.\n"); + return nullptr; + } + + ArrayRef<int> RealMask = RealShuffle->getShuffleMask(); + ArrayRef<int> ImagMask = ImagShuffle->getShuffleMask(); + if (!isDeinterleavingMask(RealMask) || !isDeinterleavingMask(ImagMask)) { + LLVM_DEBUG(dbgs() << " - Masks are not deinterleaving.\n"); + return nullptr; + } + + if (RealMask[0] != 0 || ImagMask[0] != 1) { + LLVM_DEBUG(dbgs() << " - Masks do not have the correct initial value.\n"); + return nullptr; + } + + // Type checking, the shuffle type should be a vector type of the same + // scalar type, but half the size + auto CheckType = [&](ShuffleVectorInst *Shuffle) { + Value *Op = Shuffle->getOperand(0); + auto *ShuffleTy = cast<FixedVectorType>(Shuffle->getType()); + auto *OpTy = cast<FixedVectorType>(Op->getType()); + + if (OpTy->getScalarType() != ShuffleTy->getScalarType()) + return false; + if ((ShuffleTy->getNumElements() * 2) != OpTy->getNumElements()) + return false; + + return true; + }; + + auto CheckDeinterleavingShuffle = [&](ShuffleVectorInst *Shuffle) -> bool { + if (!CheckType(Shuffle)) + return false; + + ArrayRef<int> Mask = Shuffle->getShuffleMask(); + int Last = *Mask.rbegin(); + + Value *Op = Shuffle->getOperand(0); + auto *OpTy = cast<FixedVectorType>(Op->getType()); + int NumElements = OpTy->getNumElements(); + + // Ensure that the deinterleaving shuffle only pulls from the first + // shuffle operand. + return Last < NumElements; + }; + + if (RealShuffle->getType() != ImagShuffle->getType()) { + LLVM_DEBUG(dbgs() << " - Shuffle types aren't equal.\n"); + return nullptr; + } + if (!CheckDeinterleavingShuffle(RealShuffle)) { + LLVM_DEBUG(dbgs() << " - RealShuffle is invalid type.\n"); + return nullptr; + } + if (!CheckDeinterleavingShuffle(ImagShuffle)) { + LLVM_DEBUG(dbgs() << " - ImagShuffle is invalid type.\n"); + return nullptr; + } + + NodePtr PlaceholderNode = + prepareCompositeNode(llvm::ComplexDeinterleavingOperation::Shuffle, + RealShuffle, ImagShuffle); + PlaceholderNode->ReplacementNode = RealShuffle->getOperand(0); + return submitCompositeNode(PlaceholderNode); + } + if (RealShuffle || ImagShuffle) + return nullptr; + + auto *VTy = cast<FixedVectorType>(Real->getType()); + auto *NewVTy = + FixedVectorType::get(VTy->getScalarType(), VTy->getNumElements() * 2); + + if (TL->isComplexDeinterleavingOperationSupported( + ComplexDeinterleavingOperation::CMulPartial, NewVTy) && + isInstructionPairMul(Real, Imag)) { + return identifyPartialMul(Real, Imag); + } + + if (TL->isComplexDeinterleavingOperationSupported( + ComplexDeinterleavingOperation::CAdd, NewVTy) && + isInstructionPairAdd(Real, Imag)) { + return identifyAdd(Real, Imag); + } + + return nullptr; +} + +bool ComplexDeinterleavingGraph::identifyNodes(Instruction *RootI) { + Instruction *Real; + Instruction *Imag; + if (!match(RootI, m_Shuffle(m_Instruction(Real), m_Instruction(Imag)))) + return false; + + RootValue = RootI; + AllInstructions.insert(RootI); + RootNode = identifyNode(Real, Imag); + + LLVM_DEBUG({ + Function *F = RootI->getFunction(); + BasicBlock *B = RootI->getParent(); + dbgs() << "Complex deinterleaving graph for " << F->getName() + << "::" << B->getName() << ".\n"; + dump(dbgs()); + dbgs() << "\n"; + }); + + // Check all instructions have internal uses + for (const auto &Node : CompositeNodes) { + if (!Node->hasAllInternalUses(AllInstructions)) { + LLVM_DEBUG(dbgs() << " - Invalid internal uses\n"); + return false; + } + } + return RootNode != nullptr; +} + +Value *ComplexDeinterleavingGraph::replaceNode( + ComplexDeinterleavingGraph::RawNodePtr Node) { + if (Node->ReplacementNode) + return Node->ReplacementNode; + + Value *Input0 = replaceNode(Node->Operands[0]); + Value *Input1 = replaceNode(Node->Operands[1]); + Value *Accumulator = + Node->Operands.size() > 2 ? replaceNode(Node->Operands[2]) : nullptr; + + assert(Input0->getType() == Input1->getType() && + "Node inputs need to be of the same type"); + + Node->ReplacementNode = TL->createComplexDeinterleavingIR( + Node->Real, Node->Operation, Node->Rotation, Input0, Input1, Accumulator); + + assert(Node->ReplacementNode && "Target failed to create Intrinsic call."); + NumComplexTransformations += 1; + return Node->ReplacementNode; +} + +void ComplexDeinterleavingGraph::replaceNodes() { + Value *R = replaceNode(RootNode.get()); + assert(R && "Unable to find replacement for RootValue"); + RootValue->replaceAllUsesWith(R); +} + +bool ComplexDeinterleavingCompositeNode::hasAllInternalUses( + SmallPtrSet<Instruction *, 16> &AllInstructions) { + if (Operation == ComplexDeinterleavingOperation::Shuffle) + return true; + + for (auto *User : Real->users()) { + if (!AllInstructions.contains(cast<Instruction>(User))) + return false; + } + for (auto *User : Imag->users()) { + if (!AllInstructions.contains(cast<Instruction>(User))) + return false; + } + for (auto *I : InternalInstructions) { + for (auto *User : I->users()) { + if (!AllInstructions.contains(cast<Instruction>(User))) + return false; + } + } + return true; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index ce00be634e9a..e36db43567c5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -30,10 +31,9 @@ namespace { class DeadMachineInstructionElim : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &MF) override; - const TargetRegisterInfo *TRI; const MachineRegisterInfo *MRI; const TargetInstrInfo *TII; - BitVector LivePhysRegs; + LiveRegUnits LivePhysRegs; public: static char ID; // Pass identification, replacement for typeid @@ -78,15 +78,14 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { for (const MachineOperand &MO : MI->operands()) { if (MO.isReg() && MO.isDef()) { Register Reg = MO.getReg(); - if (Register::isPhysicalRegister(Reg)) { + if (Reg.isPhysical()) { // Don't delete live physreg defs, or any reserved register defs. - if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg)) + if (!LivePhysRegs.available(Reg) || MRI->isReserved(Reg)) return false; } else { if (MO.isDead()) { #ifndef NDEBUG - // Baisc check on the register. All of them should be - // 'undef'. + // Basic check on the register. All of them should be 'undef'. for (auto &U : MRI->use_nodbg_operands(Reg)) assert(U.isUndef() && "'Undef' use on a 'dead' register is found!"); #endif @@ -108,6 +107,13 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; + + MRI = &MF.getRegInfo(); + + const TargetSubtargetInfo &ST = MF.getSubtarget(); + TII = ST.getInstrInfo(); + LivePhysRegs.init(*ST.getRegisterInfo()); + bool AnyChanges = eliminateDeadMI(MF); while (AnyChanges && eliminateDeadMI(MF)) ; @@ -116,27 +122,16 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) { bool AnyChanges = false; - MRI = &MF.getRegInfo(); - TRI = MF.getSubtarget().getRegisterInfo(); - TII = MF.getSubtarget().getInstrInfo(); // Loop over all instructions in all blocks, from bottom to top, so that it's // more likely that chains of dependent but ultimately dead instructions will // be cleaned up. for (MachineBasicBlock *MBB : post_order(&MF)) { - // Start out assuming that reserved registers are live out of this block. - LivePhysRegs = MRI->getReservedRegs(); - - // Add live-ins from successors to LivePhysRegs. Normally, physregs are not - // live across blocks, but some targets (x86) can have flags live out of a - // block. - for (const MachineBasicBlock *Succ : MBB->successors()) - for (const auto &LI : Succ->liveins()) - LivePhysRegs.set(LI.PhysReg); + LivePhysRegs.addLiveOuts(*MBB); // Now scan the instructions and delete dead ones, tracking physreg // liveness as we go. - for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(*MBB))) { + for (MachineInstr &MI : make_early_inc_range(reverse(*MBB))) { // If the instruction is dead, delete it! if (isDead(&MI)) { LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << MI); @@ -149,34 +144,7 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) { continue; } - // Record the physreg defs. - for (const MachineOperand &MO : MI.operands()) { - if (MO.isReg() && MO.isDef()) { - Register Reg = MO.getReg(); - if (Register::isPhysicalRegister(Reg)) { - // Check the subreg set, not the alias set, because a def - // of a super-register may still be partially live after - // this def. - for (MCSubRegIterator SR(Reg, TRI,/*IncludeSelf=*/true); - SR.isValid(); ++SR) - LivePhysRegs.reset(*SR); - } - } else if (MO.isRegMask()) { - // Register mask of preserved registers. All clobbers are dead. - LivePhysRegs.clearBitsNotInMask(MO.getRegMask()); - } - } - // Record the physreg uses, after the defs, in case a physreg is - // both defined and used in the same instruction. - for (const MachineOperand &MO : MI.operands()) { - if (MO.isReg() && MO.isUse()) { - Register Reg = MO.getReg(); - if (Register::isPhysicalRegister(Reg)) { - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - LivePhysRegs.set(*AI); - } - } - } + LivePhysRegs.stepBackward(MI); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp index 565c8b405f82..bbb89855cfff 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp @@ -189,7 +189,7 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO, if (!MO.readsReg()) return; Register MOReg = MO.getReg(); - if (!Register::isVirtualRegister(MOReg)) + if (!MOReg.isVirtual()) return; unsigned MOSubReg = MO.getSubReg(); @@ -213,7 +213,7 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO, void DetectDeadLanes::transferUsedLanesStep(const MachineInstr &MI, LaneBitmask UsedLanes) { for (const MachineOperand &MO : MI.uses()) { - if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !MO.getReg().isVirtual()) continue; LaneBitmask UsedOnMO = transferUsedLanes(MI, UsedLanes, MO); addUsedLanesOnOperand(MO, UsedOnMO); @@ -280,7 +280,7 @@ void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use, return; const MachineOperand &Def = *MI.defs().begin(); Register DefReg = Def.getReg(); - if (!Register::isVirtualRegister(DefReg)) + if (!DefReg.isVirtual()) return; unsigned DefRegIdx = Register::virtReg2Index(DefReg); if (!DefinedByCopy.test(DefRegIdx)) @@ -376,12 +376,12 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) { continue; LaneBitmask MODefinedLanes; - if (Register::isPhysicalRegister(MOReg)) { + if (MOReg.isPhysical()) { MODefinedLanes = LaneBitmask::getAll(); } else if (isCrossCopy(*MRI, DefMI, DefRC, MO)) { MODefinedLanes = LaneBitmask::getAll(); } else { - assert(Register::isVirtualRegister(MOReg)); + assert(MOReg.isVirtual()); if (MRI->hasOneDef(MOReg)) { const MachineOperand &MODef = *MRI->def_begin(MOReg); const MachineInstr &MODefMI = *MODef.getParent(); @@ -425,7 +425,7 @@ LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) { Register DefReg = Def.getReg(); // The used lanes of COPY-like instruction operands are determined by the // following dataflow analysis. - if (Register::isVirtualRegister(DefReg)) { + if (DefReg.isVirtual()) { // But ignore copies across incompatible register classes. bool CrossCopy = false; if (lowersToCopies(UseMI)) { @@ -465,7 +465,7 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO, return false; const MachineOperand &Def = MI.getOperand(0); Register DefReg = Def.getReg(); - if (!Register::isVirtualRegister(DefReg)) + if (!DefReg.isVirtual()) return false; unsigned DefRegIdx = Register::virtReg2Index(DefReg); if (!DefinedByCopy.test(DefRegIdx)) @@ -477,7 +477,7 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO, return false; Register MOReg = MO.getReg(); - if (Register::isVirtualRegister(MOReg)) { + if (MOReg.isVirtual()) { const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg); *CrossCopy = isCrossCopy(*MRI, MI, DstRC, MO); } @@ -488,7 +488,7 @@ std::pair<bool, bool> DetectDeadLanes::runOnce(MachineFunction &MF) { // First pass: Populate defs/uses of vregs with initial values unsigned NumVirtRegs = MRI->getNumVirtRegs(); for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { - unsigned Reg = Register::index2VirtReg(RegIdx); + Register Reg = Register::index2VirtReg(RegIdx); // Determine used/defined lanes and add copy instructions to worklist. VRegInfo &Info = VRegInfos[RegIdx]; @@ -502,7 +502,7 @@ std::pair<bool, bool> DetectDeadLanes::runOnce(MachineFunction &MF) { Worklist.pop_front(); WorklistMembers.reset(RegIdx); VRegInfo &Info = VRegInfos[RegIdx]; - unsigned Reg = Register::index2VirtReg(RegIdx); + Register Reg = Register::index2VirtReg(RegIdx); // Transfer UsedLanes to operands of DefMI (backwards dataflow). MachineOperand &Def = *MRI->def_begin(Reg); @@ -516,7 +516,7 @@ std::pair<bool, bool> DetectDeadLanes::runOnce(MachineFunction &MF) { LLVM_DEBUG({ dbgs() << "Defined/Used lanes:\n"; for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { - unsigned Reg = Register::index2VirtReg(RegIdx); + Register Reg = Register::index2VirtReg(RegIdx); const VRegInfo &Info = VRegInfos[RegIdx]; dbgs() << printReg(Reg, nullptr) << " Used: " << PrintLaneMask(Info.UsedLanes) @@ -534,7 +534,7 @@ std::pair<bool, bool> DetectDeadLanes::runOnce(MachineFunction &MF) { if (!MO.isReg()) continue; Register Reg = MO.getReg(); - if (!Register::isVirtualRegister(Reg)) + if (!Reg.isVirtual()) continue; unsigned RegIdx = Register::virtReg2Index(Reg); const VRegInfo &RegInfo = VRegInfos[RegIdx]; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp index c108f0088d43..00626604d81c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -262,12 +262,12 @@ bool SSAIfConv::InstrDependenciesAllowIfConv(MachineInstr *I) { Register Reg = MO.getReg(); // Remember clobbered regunits. - if (MO.isDef() && Register::isPhysicalRegister(Reg)) + if (MO.isDef() && Reg.isPhysical()) for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid(); ++Units) ClobberedRegUnits.set(*Units); - if (!MO.readsReg() || !Register::isVirtualRegister(Reg)) + if (!MO.readsReg() || !Reg.isVirtual()) continue; MachineInstr *DefMI = MRI->getVRegDef(Reg); if (!DefMI || DefMI->getParent() != Head) @@ -321,9 +321,15 @@ bool SSAIfConv::canPredicateInstrs(MachineBasicBlock *MBB) { return false; } - // Check that instruction is predicable and that it is not already - // predicated. - if (!TII->isPredicable(*I) || TII->isPredicated(*I)) { + // Check that instruction is predicable + if (!TII->isPredicable(*I)) { + LLVM_DEBUG(dbgs() << "Isn't predicable: " << *I); + return false; + } + + // Check that instruction is not already predicated. + if (TII->isPredicated(*I) && !TII->canPredicatePredicatedInstr(*I)) { + LLVM_DEBUG(dbgs() << "Is already predicated: " << *I); return false; } @@ -381,7 +387,7 @@ bool SSAIfConv::findInsertionPoint() { if (!MO.isReg()) continue; Register Reg = MO.getReg(); - if (!Register::isPhysicalRegister(Reg)) + if (!Reg.isPhysical()) continue; // I clobbers Reg, so it isn't live before I. if (MO.isDef()) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp new file mode 100644 index 000000000000..057b5311db70 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp @@ -0,0 +1,139 @@ +//===--- ExpandLargeDivRem.cpp - Expand large div/rem ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass expands div/rem instructions with a bitwidth above a threshold +// into a call to auto-generated functions. +// This is useful for targets like x86_64 that cannot lower divisions +// with more than 128 bits or targets like x86_32 that cannot lower divisions +// with more than 64 bits. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/IntegerDivision.h" + +using namespace llvm; + +static cl::opt<unsigned> + ExpandDivRemBits("expand-div-rem-bits", cl::Hidden, + cl::init(llvm::IntegerType::MAX_INT_BITS), + cl::desc("div and rem instructions on integers with " + "more than <N> bits are expanded.")); + +static bool isConstantPowerOfTwo(llvm::Value *V, bool SignedOp) { + auto *C = dyn_cast<ConstantInt>(V); + if (!C) + return false; + + APInt Val = C->getValue(); + if (SignedOp && Val.isNegative()) + Val = -Val; + return Val.isPowerOf2(); +} + +static bool isSigned(unsigned int Opcode) { + return Opcode == Instruction::SDiv || Opcode == Instruction::SRem; +} + +static bool runImpl(Function &F, const TargetLowering &TLI) { + SmallVector<BinaryOperator *, 4> Replace; + bool Modified = false; + + unsigned MaxLegalDivRemBitWidth = TLI.getMaxDivRemBitWidthSupported(); + if (ExpandDivRemBits != llvm::IntegerType::MAX_INT_BITS) + MaxLegalDivRemBitWidth = ExpandDivRemBits; + + if (MaxLegalDivRemBitWidth >= llvm::IntegerType::MAX_INT_BITS) + return false; + + for (auto &I : instructions(F)) { + switch (I.getOpcode()) { + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: { + // TODO: This doesn't handle vectors. + auto *IntTy = dyn_cast<IntegerType>(I.getType()); + if (!IntTy || IntTy->getIntegerBitWidth() <= MaxLegalDivRemBitWidth) + continue; + + // The backend has peephole optimizations for powers of two. + if (isConstantPowerOfTwo(I.getOperand(1), isSigned(I.getOpcode()))) + continue; + + Replace.push_back(&cast<BinaryOperator>(I)); + Modified = true; + break; + } + default: + break; + } + } + + if (Replace.empty()) + return false; + + while (!Replace.empty()) { + BinaryOperator *I = Replace.pop_back_val(); + + if (I->getOpcode() == Instruction::UDiv || + I->getOpcode() == Instruction::SDiv) { + expandDivision(I); + } else { + expandRemainder(I); + } + } + + return Modified; +} + +namespace { +class ExpandLargeDivRemLegacyPass : public FunctionPass { +public: + static char ID; + + ExpandLargeDivRemLegacyPass() : FunctionPass(ID) { + initializeExpandLargeDivRemLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); + auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering(); + return runImpl(F, *TLI); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetPassConfig>(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + } +}; +} // namespace + +char ExpandLargeDivRemLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(ExpandLargeDivRemLegacyPass, "expand-large-div-rem", + "Expand large div/rem", false, false) +INITIALIZE_PASS_END(ExpandLargeDivRemLegacyPass, "expand-large-div-rem", + "Expand large div/rem", false, false) + +FunctionPass *llvm::createExpandLargeDivRemPass() { + return new ExpandLargeDivRemLegacyPass(); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp new file mode 100644 index 000000000000..ca8056a53139 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp @@ -0,0 +1,664 @@ +//===--- ExpandLargeFpConvert.cpp - Expand large fp convert----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// + +// This pass expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’, +// ‘sitofp .. to’ instructions with a bitwidth above a threshold into +// auto-generated functions. This is useful for targets like x86_64 that cannot +// lower fp convertions with more than 128 bits. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +static cl::opt<unsigned> + ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden, + cl::init(llvm::IntegerType::MAX_INT_BITS), + cl::desc("fp convert instructions on integers with " + "more than <N> bits are expanded.")); + +/// Generate code to convert a fp number to integer, replacing FPToS(U)I with +/// the generated code. This currently generates code similarly to compiler-rt's +/// implementations. +/// +/// An example IR generated from compiler-rt/fixsfdi.c looks like below: +/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 { +/// entry: +/// %0 = bitcast float %a to i32 +/// %conv.i = zext i32 %0 to i64 +/// %tobool.not = icmp sgt i32 %0, -1 +/// %conv = select i1 %tobool.not, i64 1, i64 -1 +/// %and = lshr i64 %conv.i, 23 +/// %shr = and i64 %and, 255 +/// %and2 = and i64 %conv.i, 8388607 +/// %or = or i64 %and2, 8388608 +/// %cmp = icmp ult i64 %shr, 127 +/// br i1 %cmp, label %cleanup, label %if.end +/// +/// if.end: ; preds = %entry +/// %sub = add nuw nsw i64 %shr, 4294967169 +/// %conv5 = and i64 %sub, 4294967232 +/// %cmp6.not = icmp eq i64 %conv5, 0 +/// br i1 %cmp6.not, label %if.end12, label %if.then8 +/// +/// if.then8: ; preds = %if.end +/// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64 -9223372036854775808 +/// br label %cleanup +/// +/// if.end12: ; preds = %if.end +/// %cmp13 = icmp ult i64 %shr, 150 +/// br i1 %cmp13, label %if.then15, label %if.else +/// +/// if.then15: ; preds = %if.end12 +/// %sub16 = sub nuw nsw i64 150, %shr +/// %shr17 = lshr i64 %or, %sub16 +/// %mul = mul nsw i64 %shr17, %conv +/// br label %cleanup +/// +/// if.else: ; preds = %if.end12 +/// %sub18 = add nsw i64 %shr, -150 +/// %shl = shl i64 %or, %sub18 +/// %mul19 = mul nsw i64 %shl, %conv +/// br label %cleanup +/// +/// cleanup: ; preds = %entry, %if.else, %if.then15, %if.then8 +/// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [ %mul19, %if.else ], [ 0, %entry ] +/// ret i64 %retval.0 +/// } +/// +/// Replace fp to integer with generated code. +static void expandFPToI(Instruction *FPToI) { + IRBuilder<> Builder(FPToI); + auto *FloatVal = FPToI->getOperand(0); + IntegerType *IntTy = cast<IntegerType>(FPToI->getType()); + + unsigned BitWidth = FPToI->getType()->getIntegerBitWidth(); + unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1; + + // FIXME: fp16's range is covered by i32. So `fptoi half` can convert + // to i32 first following a sext/zext to target integer type. + Value *A1 = nullptr; + if (FloatVal->getType()->isHalfTy()) { + if (FPToI->getOpcode() == Instruction::FPToUI) { + Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getIntNTy(32)); + A1 = Builder.CreateZExt(A0, IntTy); + } else { // FPToSI + Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getIntNTy(32)); + A1 = Builder.CreateSExt(A0, IntTy); + } + FPToI->replaceAllUsesWith(A1); + FPToI->dropAllReferences(); + FPToI->eraseFromParent(); + return; + } + + // fp80 conversion is implemented by fpext to fp128 first then do the + // conversion. + FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth; + unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth); + unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1; + unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1; + Value *ImplicitBit = Builder.CreateShl( + Builder.getIntN(BitWidth, 1), Builder.getIntN(BitWidth, FPMantissaWidth)); + Value *SignificandMask = + Builder.CreateSub(ImplicitBit, Builder.getIntN(BitWidth, 1)); + Value *NegOne = Builder.CreateSExt( + ConstantInt::getSigned(Builder.getInt32Ty(), -1), IntTy); + Value *NegInf = + Builder.CreateShl(ConstantInt::getSigned(IntTy, 1), + ConstantInt::getSigned(IntTy, BitWidth - 1)); + + BasicBlock *Entry = Builder.GetInsertBlock(); + Function *F = Entry->getParent(); + Entry->setName(Twine(Entry->getName(), "fp-to-i-entry")); + BasicBlock *End = + Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup"); + BasicBlock *IfEnd = + BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end", F, End); + BasicBlock *IfThen5 = + BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then5", F, End); + BasicBlock *IfEnd9 = + BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end9", F, End); + BasicBlock *IfThen12 = + BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then12", F, End); + BasicBlock *IfElse = + BasicBlock::Create(Builder.getContext(), "fp-to-i-if-else", F, End); + + Entry->getTerminator()->eraseFromParent(); + + // entry: + Builder.SetInsertPoint(Entry); + Value *FloatVal0 = FloatVal; + // fp80 conversion is implemented by fpext to fp128 first then do the + // conversion. + if (FloatVal->getType()->isX86_FP80Ty()) + FloatVal0 = + Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext())); + Value *ARep0 = + Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth)); + Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType()); + Value *PosOrNeg = Builder.CreateICmpSGT( + ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1)); + Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1), + ConstantInt::getSigned(IntTy, -1)); + Value *And = + Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth)); + Value *And2 = Builder.CreateAnd( + And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1)); + Value *Abs = Builder.CreateAnd(ARep, SignificandMask); + Value *Or = Builder.CreateOr(Abs, ImplicitBit); + Value *Cmp = + Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias)); + Builder.CreateCondBr(Cmp, End, IfEnd); + + // if.end: + Builder.SetInsertPoint(IfEnd); + Value *Add1 = Builder.CreateAdd( + And2, ConstantInt::getSigned(IntTy, -int64_t(ExponentBias + BitWidth))); + Value *Cmp3 = + Builder.CreateICmpULT(Add1, ConstantInt::getSigned(IntTy, -BitWidth)); + Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9); + + // if.then5: + Builder.SetInsertPoint(IfThen5); + Value *PosInf = Builder.CreateXor(NegOne, NegInf); + Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf); + Builder.CreateBr(End); + + // if.end9: + Builder.SetInsertPoint(IfEnd9); + Value *Cmp10 = Builder.CreateICmpULT( + And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth)); + Builder.CreateCondBr(Cmp10, IfThen12, IfElse); + + // if.then12: + Builder.SetInsertPoint(IfThen12); + Value *Sub13 = Builder.CreateSub( + Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), And2); + Value *Shr14 = Builder.CreateLShr(Or, Sub13); + Value *Mul = Builder.CreateMul(Shr14, Sign); + Builder.CreateBr(End); + + // if.else: + Builder.SetInsertPoint(IfElse); + Value *Sub15 = Builder.CreateAdd( + And2, + ConstantInt::getSigned(IntTy, -(ExponentBias + FPMantissaWidth))); + Value *Shl = Builder.CreateShl(Or, Sub15); + Value *Mul16 = Builder.CreateMul(Shl, Sign); + Builder.CreateBr(End); + + // cleanup: + Builder.SetInsertPoint(End, End->begin()); + PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 4); + + Retval0->addIncoming(Cond8, IfThen5); + Retval0->addIncoming(Mul, IfThen12); + Retval0->addIncoming(Mul16, IfElse); + Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry); + + FPToI->replaceAllUsesWith(Retval0); + FPToI->dropAllReferences(); + FPToI->eraseFromParent(); +} + +/// Generate code to convert a fp number to integer, replacing S(U)IToFP with +/// the generated code. This currently generates code similarly to compiler-rt's +/// implementations. This implementation has an implicit assumption that integer +/// width is larger than fp. +/// +/// An example IR generated from compiler-rt/floatdisf.c looks like below: +/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 { +/// entry: +/// %cmp = icmp eq i64 %a, 0 +/// br i1 %cmp, label %return, label %if.end +/// +/// if.end: ; preds = %entry +/// %shr = ashr i64 %a, 63 +/// %xor = xor i64 %shr, %a +/// %sub = sub nsw i64 %xor, %shr +/// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5 +/// %cast = trunc i64 %0 to i32 +/// %sub1 = sub nuw nsw i32 64, %cast +/// %sub2 = xor i32 %cast, 63 +/// %cmp3 = icmp ult i32 %cast, 40 +/// br i1 %cmp3, label %if.then4, label %if.else +/// +/// if.then4: ; preds = %if.end +/// switch i32 %sub1, label %sw.default [ +/// i32 25, label %sw.bb +/// i32 26, label %sw.epilog +/// ] +/// +/// sw.bb: ; preds = %if.then4 +/// %shl = shl i64 %sub, 1 +/// br label %sw.epilog +/// +/// sw.default: ; preds = %if.then4 +/// %sub5 = sub nsw i64 38, %0 +/// %sh_prom = and i64 %sub5, 4294967295 +/// %shr6 = lshr i64 %sub, %sh_prom +/// %shr9 = lshr i64 274877906943, %0 +/// %and = and i64 %shr9, %sub +/// %cmp10 = icmp ne i64 %and, 0 +/// %conv11 = zext i1 %cmp10 to i64 +/// %or = or i64 %shr6, %conv11 +/// br label %sw.epilog +/// +/// sw.epilog: ; preds = %sw.default, %if.then4, %sw.bb +/// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl, %sw.bb ] +/// %1 = lshr i64 %a.addr.0, 2 +/// %2 = and i64 %1, 1 +/// %or16 = or i64 %2, %a.addr.0 +/// %inc = add nsw i64 %or16, 1 +/// %3 = and i64 %inc, 67108864 +/// %tobool.not = icmp eq i64 %3, 0 +/// %spec.select.v = select i1 %tobool.not, i64 2, i64 3 +/// %spec.select = ashr i64 %inc, %spec.select.v +/// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1 +/// br label %if.end26 +/// +/// if.else: ; preds = %if.end +/// %sub23 = add nuw nsw i64 %0, 4294967256 +/// %sh_prom24 = and i64 %sub23, 4294967295 +/// %shl25 = shl i64 %sub, %sh_prom24 +/// br label %if.end26 +/// +/// if.end26: ; preds = %sw.epilog, %if.else +/// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ] +/// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ] +/// %conv27 = trunc i64 %shr to i32 +/// %and28 = and i32 %conv27, -2147483648 +/// %add = shl nuw nsw i32 %e.0, 23 +/// %shl29 = add nuw nsw i32 %add, 1065353216 +/// %conv31 = trunc i64 %a.addr.1 to i32 +/// %and32 = and i32 %conv31, 8388607 +/// %or30 = or i32 %and32, %and28 +/// %or33 = or i32 %or30, %shl29 +/// %4 = bitcast i32 %or33 to float +/// br label %return +/// +/// return: ; preds = %entry, %if.end26 +/// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ] +/// ret float %retval.0 +/// } +/// +/// Replace integer to fp with generated code. +static void expandIToFP(Instruction *IToFP) { + IRBuilder<> Builder(IToFP); + auto *IntVal = IToFP->getOperand(0); + IntegerType *IntTy = cast<IntegerType>(IntVal->getType()); + + unsigned BitWidth = IntVal->getType()->getIntegerBitWidth(); + unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1; + // fp80 conversion is implemented by conversion tp fp128 first following + // a fptrunc to fp80. + FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth; + // FIXME: As there is no related builtins added in compliler-rt, + // here currently utilized the fp32 <-> fp16 lib calls to implement. + FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth; + unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth); + bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP; + + assert(BitWidth > FloatWidth && "Unexpected conversion. expandIToFP() " + "assumes integer width is larger than fp."); + + Value *Temp1 = + Builder.CreateShl(Builder.getIntN(BitWidth, 1), + Builder.getIntN(BitWidth, FPMantissaWidth + 3)); + + BasicBlock *Entry = Builder.GetInsertBlock(); + Function *F = Entry->getParent(); + Entry->setName(Twine(Entry->getName(), "itofp-entry")); + BasicBlock *End = + Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return"); + BasicBlock *IfEnd = + BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End); + BasicBlock *IfThen4 = + BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End); + BasicBlock *SwBB = + BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End); + BasicBlock *SwDefault = + BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End); + BasicBlock *SwEpilog = + BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End); + BasicBlock *IfThen20 = + BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End); + BasicBlock *IfElse = + BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End); + BasicBlock *IfEnd26 = + BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End); + + Entry->getTerminator()->eraseFromParent(); + + Function *CTLZ = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, IntTy); + ConstantInt *True = Builder.getTrue(); + + // entry: + Builder.SetInsertPoint(Entry); + Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0)); + Builder.CreateCondBr(Cmp, End, IfEnd); + + // if.end: + Builder.SetInsertPoint(IfEnd); + Value *Shr = + Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1)); + Value *Xor = Builder.CreateXor(Shr, IntVal); + Value *Sub = Builder.CreateSub(Xor, Shr); + Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True}); + Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty()); + int BitWidthNew = FloatWidth == 128 ? BitWidth : 32; + Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth), + FloatWidth == 128 ? Call : Cast); + Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1), + FloatWidth == 128 ? Call : Cast); + Value *Cmp3 = Builder.CreateICmpSGT( + Sub2, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1)); + Builder.CreateCondBr(Cmp3, IfThen4, IfElse); + + // if.then4: + Builder.SetInsertPoint(IfThen4); + llvm::SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault); + SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB); + SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog); + + // sw.bb: + Builder.SetInsertPoint(SwBB); + Value *Shl = + Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1)); + Builder.CreateBr(SwEpilog); + + // sw.default: + Builder.SetInsertPoint(SwDefault); + Value *Sub5 = Builder.CreateSub( + Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3), + FloatWidth == 128 ? Call : Cast); + Value *ShProm = Builder.CreateZExt(Sub5, IntTy); + Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal, + FloatWidth == 128 ? Sub5 : ShProm); + Value *Sub8 = + Builder.CreateAdd(FloatWidth == 128 ? Call : Cast, + Builder.getIntN(BitWidthNew, FPMantissaWidth + 3)); + Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy); + Value *Shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1), + FloatWidth == 128 ? Sub8 : ShProm9); + Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal); + Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0)); + Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy); + Value *Or = Builder.CreateOr(Shr6, Conv11); + Builder.CreateBr(SwEpilog); + + // sw.epilog: + Builder.SetInsertPoint(SwEpilog); + PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3); + AAddr0->addIncoming(Or, SwDefault); + AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4); + AAddr0->addIncoming(Shl, SwBB); + Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty()); + Value *A1 = Builder.CreateLShr(A0, Builder.getIntN(32, 2)); + Value *A2 = Builder.CreateAnd(A1, Builder.getIntN(32, 1)); + Value *Conv16 = Builder.CreateZExt(A2, IntTy); + Value *Or17 = Builder.CreateOr(AAddr0, Conv16); + Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1)); + Value *Shr18 = nullptr; + if (IsSigned) + Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2)); + else + Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2)); + Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3"); + Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0)); + Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth)); + Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32)); + Value *ExtractT64 = nullptr; + if (FloatWidth > 80) + ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty()); + else + ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty()); + Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20); + + // if.then20 + Builder.SetInsertPoint(IfThen20); + Value *Shr21 = nullptr; + if (IsSigned) + Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3)); + else + Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3)); + Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth)); + Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32)); + Value *ExtractT62 = nullptr; + if (FloatWidth > 80) + ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getIntNTy(64)); + else + ExtractT62 = Builder.CreateTrunc(Extract, Builder.getIntNTy(32)); + Builder.CreateBr(IfEnd26); + + // if.else: + Builder.SetInsertPoint(IfElse); + Value *Sub24 = Builder.CreateAdd( + FloatWidth == 128 ? Call : Cast, + ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew), + -(BitWidth - FPMantissaWidth - 1))); + Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy); + Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal, + FloatWidth == 128 ? Sub24 : ShProm25); + Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth)); + Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32)); + Value *ExtractT66 = nullptr; + if (FloatWidth > 80) + ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getIntNTy(64)); + else + ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty()); + Builder.CreateBr(IfEnd26); + + // if.end26: + Builder.SetInsertPoint(IfEnd26); + PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3); + AAddr1Off0->addIncoming(ExtractT, IfThen20); + AAddr1Off0->addIncoming(ExtractT60, SwEpilog); + AAddr1Off0->addIncoming(ExtractT61, IfElse); + PHINode *AAddr1Off32 = nullptr; + if (FloatWidth > 32) { + AAddr1Off32 = + Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3); + AAddr1Off32->addIncoming(ExtractT62, IfThen20); + AAddr1Off32->addIncoming(ExtractT64, SwEpilog); + AAddr1Off32->addIncoming(ExtractT66, IfElse); + } + PHINode *E0 = nullptr; + if (FloatWidth <= 80) { + E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3); + E0->addIncoming(Sub1, IfThen20); + E0->addIncoming(Sub2, SwEpilog); + E0->addIncoming(Sub2, IfElse); + } + Value *And29 = nullptr; + if (FloatWidth > 80) { + Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1), + Builder.getIntN(BitWidth, 63)); + And29 = Builder.CreateAnd(Shr, Temp2, "and29"); + } else { + Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getIntNTy(32)); + And29 = Builder.CreateAnd( + Conv28, ConstantInt::getSigned(Builder.getIntNTy(32), 0x80000000)); + } + unsigned TempMod = FPMantissaWidth % 32; + Value *And34 = nullptr; + Value *Shl30 = nullptr; + if (FloatWidth > 80) { + TempMod += 32; + Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getIntN(64, TempMod)); + Shl30 = Builder.CreateAdd( + Add, + Builder.getIntN(64, ((1ull << (62ull - TempMod)) - 1ull) << TempMod)); + And34 = Builder.CreateZExt(Shl30, Builder.getIntNTy(128)); + } else { + Value *Add = Builder.CreateShl(E0, Builder.getIntN(32, TempMod)); + Shl30 = Builder.CreateAdd( + Add, Builder.getIntN(32, ((1 << (30 - TempMod)) - 1) << TempMod)); + And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0, + Builder.getIntN(32, (1 << TempMod) - 1)); + } + Value *Or35 = nullptr; + if (FloatWidth > 80) { + Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getIntNTy(128)); + Value *Or31 = Builder.CreateOr(And29Trunc, And34); + Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64)); + Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1), + Builder.getIntN(128, FPMantissaWidth)); + Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1)); + Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4); + Or35 = Builder.CreateOr(Or34, A6); + } else { + Value *Or31 = Builder.CreateOr(And34, And29); + Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30); + } + Value *A4 = nullptr; + if (IToFP->getType()->isDoubleTy()) { + Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth)); + Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32)); + Value *And1 = + Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF)); + Value *Or1 = Builder.CreateOr(Shl1, And1); + A4 = Builder.CreateBitCast(Or1, IToFP->getType()); + } else if (IToFP->getType()->isX86_FP80Ty()) { + Value *A40 = + Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext())); + A4 = Builder.CreateFPTrunc(A40, IToFP->getType()); + } else if (IToFP->getType()->isHalfTy()) { + // Deal with "half" situation. This is a workaround since we don't have + // floattihf.c currently as referring. + Value *A40 = + Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext())); + A4 = Builder.CreateFPTrunc(A40, IToFP->getType()); + } else // float type + A4 = Builder.CreateBitCast(Or35, IToFP->getType()); + Builder.CreateBr(End); + + // return: + Builder.SetInsertPoint(End, End->begin()); + PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2); + Retval0->addIncoming(A4, IfEnd26); + Retval0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry); + + IToFP->replaceAllUsesWith(Retval0); + IToFP->dropAllReferences(); + IToFP->eraseFromParent(); +} + +static bool runImpl(Function &F, const TargetLowering &TLI) { + SmallVector<Instruction *, 4> Replace; + bool Modified = false; + + unsigned MaxLegalFpConvertBitWidth = + TLI.getMaxLargeFPConvertBitWidthSupported(); + if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS) + MaxLegalFpConvertBitWidth = ExpandFpConvertBits; + + if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS) + return false; + + for (auto &I : instructions(F)) { + switch (I.getOpcode()) { + case Instruction::FPToUI: + case Instruction::FPToSI: { + // TODO: This pass doesn't handle vectors. + if (I.getOperand(0)->getType()->isVectorTy()) + continue; + + auto *IntTy = dyn_cast<IntegerType>(I.getType()); + if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) + continue; + + Replace.push_back(&I); + Modified = true; + break; + } + case Instruction::UIToFP: + case Instruction::SIToFP: { + // TODO: This pass doesn't handle vectors. + if (I.getOperand(0)->getType()->isVectorTy()) + continue; + + auto *IntTy = dyn_cast<IntegerType>(I.getOperand(0)->getType()); + if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) + continue; + + Replace.push_back(&I); + Modified = true; + break; + } + default: + break; + } + } + + if (Replace.empty()) + return false; + + while (!Replace.empty()) { + Instruction *I = Replace.pop_back_val(); + if (I->getOpcode() == Instruction::FPToUI || + I->getOpcode() == Instruction::FPToSI) { + expandFPToI(I); + } else { + expandIToFP(I); + } + } + + return Modified; +} + +namespace { +class ExpandLargeFpConvertLegacyPass : public FunctionPass { +public: + static char ID; + + ExpandLargeFpConvertLegacyPass() : FunctionPass(ID) { + initializeExpandLargeFpConvertLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); + auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering(); + return runImpl(F, *TLI); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetPassConfig>(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + } +}; +} // namespace + +char ExpandLargeFpConvertLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert", + "Expand large fp convert", false, false) +INITIALIZE_PASS_END(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert", + "Expand large fp convert", false, false) + +FunctionPass *llvm::createExpandLargeFpConvertPass() { + return new ExpandLargeFpConvertLegacyPass(); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp index b2639636dda7..3838eaadd1d2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -28,6 +28,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SizeOpts.h" +#include <optional> using namespace llvm; @@ -877,15 +878,14 @@ ExpandMemCmpPass::runImpl(Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, const TargetLowering *TL, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, DominatorTree *DT) { - Optional<DomTreeUpdater> DTU; + std::optional<DomTreeUpdater> DTU; if (DT) DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy); const DataLayout& DL = F.getParent()->getDataLayout(); bool MadeChanges = false; for (auto BBIt = F.begin(); BBIt != F.end();) { - if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI, - DTU ? DTU.getPointer() : nullptr)) { + if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI, DTU ? &*DTU : nullptr)) { MadeChanges = true; // If changes were made, restart the function from the beginning, since // the structure of the function was changed. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp index 086b4a4dcc47..cc63984158c8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -93,9 +93,9 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { assert(SubIdx != 0 && "Invalid index for insert_subreg"); Register DstSubReg = TRI->getSubReg(DstReg, SubIdx); - assert(Register::isPhysicalRegister(DstReg) && + assert(DstReg.isPhysical() && "Insert destination must be in a physical register"); - assert(Register::isPhysicalRegister(InsReg) && + assert(InsReg.isPhysical() && "Inserted value must be in a physical register"); LLVM_DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp index db4d42bf3ca4..5ee76ff567fb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include <optional> using namespace llvm; @@ -122,7 +123,7 @@ static bool maySpeculateLanes(VPIntrinsic &VPI) { if (isa<VPReductionIntrinsic>(VPI)) return false; // Fallback to whether the intrinsic is speculatable. - Optional<unsigned> OpcOpt = VPI.getFunctionalOpcode(); + std::optional<unsigned> OpcOpt = VPI.getFunctionalOpcode(); unsigned FunctionalOpc = OpcOpt.value_or((unsigned)Instruction::Call); return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc, &VPI); } @@ -166,25 +167,27 @@ struct CachingVPExpander { /// length of the operation. void discardEVLParameter(VPIntrinsic &PI); - /// \brief Lower this VP binary operator to a unpredicated binary operator. + /// Lower this VP binary operator to a unpredicated binary operator. Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder, VPIntrinsic &PI); - /// \brief Lower this VP reduction to a call to an unpredicated reduction - /// intrinsic. + /// Lower this VP reduction to a call to an unpredicated reduction intrinsic. Value *expandPredicationInReduction(IRBuilder<> &Builder, VPReductionIntrinsic &PI); - /// \brief Lower this VP memory operation to a non-VP intrinsic. + /// Lower this VP memory operation to a non-VP intrinsic. Value *expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, VPIntrinsic &VPI); - /// \brief Query TTI and expand the vector predication in \p P accordingly. + /// Lower this VP comparison to a call to an unpredicated comparison. + Value *expandPredicationInComparison(IRBuilder<> &Builder, + VPCmpIntrinsic &PI); + + /// Query TTI and expand the vector predication in \p P accordingly. Value *expandPredication(VPIntrinsic &PI); - /// \brief Determine how and whether the VPIntrinsic \p VPI shall be - /// expanded. This overrides TTI with the cl::opts listed at the top of this - /// file. + /// Determine how and whether the VPIntrinsic \p VPI shall be expanded. This + /// overrides TTI with the cl::opts listed at the top of this file. VPLegalization getVPLegalizationStrategy(const VPIntrinsic &VPI) const; bool UsingTTIOverrides; @@ -293,7 +296,7 @@ static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI, APInt::getSignedMinValue(EltBits)); case Intrinsic::vp_reduce_fmax: Negative = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case Intrinsic::vp_reduce_fmin: { FastMathFlags Flags = VPI.getFastMathFlags(); const fltSemantics &Semantics = EltTy->getFltSemantics(); @@ -420,7 +423,7 @@ CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, StoreInst *NewStore = Builder.CreateStore(DataParam, PtrParam, /*IsVolatile*/ false); if (AlignOpt.has_value()) - NewStore->setAlignment(AlignOpt.value()); + NewStore->setAlignment(*AlignOpt); NewMemoryInst = NewStore; } else NewMemoryInst = Builder.CreateMaskedStore( @@ -432,7 +435,7 @@ CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, LoadInst *NewLoad = Builder.CreateLoad(VPI.getType(), PtrParam, /*IsVolatile*/ false); if (AlignOpt.has_value()) - NewLoad->setAlignment(AlignOpt.value()); + NewLoad->setAlignment(*AlignOpt); NewMemoryInst = NewLoad; } else NewMemoryInst = Builder.CreateMaskedLoad( @@ -462,6 +465,24 @@ CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, return NewMemoryInst; } +Value *CachingVPExpander::expandPredicationInComparison(IRBuilder<> &Builder, + VPCmpIntrinsic &VPI) { + assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && + "Implicitly dropping %evl in non-speculatable operator!"); + + assert(*VPI.getFunctionalOpcode() == Instruction::ICmp || + *VPI.getFunctionalOpcode() == Instruction::FCmp); + + Value *Op0 = VPI.getOperand(0); + Value *Op1 = VPI.getOperand(1); + auto Pred = VPI.getPredicate(); + + auto *NewCmp = Builder.CreateCmp(Pred, Op0, Op1); + + replaceOperation(*NewCmp, VPI); + return NewCmp; +} + void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) { LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n"); @@ -538,6 +559,9 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) { if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI)) return expandPredicationInReduction(Builder, *VPRI); + if (auto *VPCmp = dyn_cast<VPCmpIntrinsic>(&VPI)) + return expandPredicationInComparison(Builder, *VPCmp); + switch (VPI.getIntrinsicID()) { default: break; @@ -598,7 +622,7 @@ CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const { return VPStrat; } -/// \brief Expand llvm.vp.* intrinsics as requested by \p TTI. +/// Expand llvm.vp.* intrinsics as requested by \p TTI. bool CachingVPExpander::expandVectorPredication() { SmallVector<TransformJob, 16> Worklist; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp index 252910fd9462..55d939de426e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp @@ -422,7 +422,7 @@ public: LLVM_DEBUG(dbgs() << "Insert spill before " << *InsertBefore); TII.storeRegToStackSlot(*MI.getParent(), InsertBefore, Reg, IsKill, FI, - RC, &TRI); + RC, &TRI, Register()); } } @@ -431,7 +431,7 @@ public: const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg); int FI = RegToSlotIdx[Reg]; if (It != MBB->end()) { - TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI); + TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI, Register()); return; } @@ -439,7 +439,7 @@ public: // and then swap them. assert(!MBB->empty() && "Empty block"); --It; - TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI); + TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI, Register()); MachineInstr *Reload = It->getPrevNode(); int Dummy = 0; (void)Dummy; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp index 6a0d1c33d3e3..356d208fc881 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -61,6 +61,10 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) { case TargetOpcode::G_TRUNC: case TargetOpcode::G_PTR_ADD: case TargetOpcode::G_EXTRACT: + case TargetOpcode::G_SELECT: + case TargetOpcode::G_BUILD_VECTOR: + case TargetOpcode::G_BUILD_VECTOR_TRUNC: + case TargetOpcode::G_SEXT_INREG: return true; } return false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index a432e4ed7fb7..64e2d517e3b9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -107,7 +107,7 @@ void CSEMIRBuilder::profileMBBOpcode(GISelInstProfileBuilder &B, void CSEMIRBuilder::profileEverything(unsigned Opc, ArrayRef<DstOp> DstOps, ArrayRef<SrcOp> SrcOps, - Optional<unsigned> Flags, + std::optional<unsigned> Flags, GISelInstProfileBuilder &B) const { profileMBBOpcode(B, Opc); @@ -170,7 +170,7 @@ CSEMIRBuilder::generateCopiesIfRequired(ArrayRef<DstOp> DstOps, MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps, ArrayRef<SrcOp> SrcOps, - Optional<unsigned> Flag) { + std::optional<unsigned> Flag) { switch (Opc) { default: break; @@ -210,8 +210,8 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc, break; } - if (Optional<APInt> Cst = ConstantFoldBinOp(Opc, SrcOps[0].getReg(), - SrcOps[1].getReg(), *getMRI())) + if (std::optional<APInt> Cst = ConstantFoldBinOp( + Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI())) return buildConstant(DstOps[0], *Cst); break; } @@ -230,7 +230,7 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc, // Try to constant fold these. assert(SrcOps.size() == 2 && "Invalid sources"); assert(DstOps.size() == 1 && "Invalid dsts"); - if (Optional<APFloat> Cst = ConstantFoldFPBinOp( + if (std::optional<APFloat> Cst = ConstantFoldFPBinOp( Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI())) return buildFConstant(DstOps[0], *Cst); break; @@ -251,7 +251,7 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc, // Try to constant fold these. assert(SrcOps.size() == 1 && "Invalid sources"); assert(DstOps.size() == 1 && "Invalid dsts"); - if (Optional<APFloat> Cst = ConstantFoldIntToFloat( + if (std::optional<APFloat> Cst = ConstantFoldIntToFloat( Opc, DstOps[0].getLLTTy(*getMRI()), SrcOps[0].getReg(), *getMRI())) return buildFConstant(DstOps[0], *Cst); break; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 6c36c6445c65..89872259cfca 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -70,6 +70,15 @@ ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call, return Flags; } +ISD::ArgFlagsTy +CallLowering::getAttributesForReturn(const CallBase &Call) const { + ISD::ArgFlagsTy Flags; + addFlagsUsingAttrFn(Flags, [&Call](Attribute::AttrKind Attr) { + return Call.hasRetAttr(Attr); + }); + return Flags; +} + void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags, const AttributeList &Attrs, unsigned OpIdx) const { @@ -141,7 +150,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, Register ReturnHintAlignReg; Align ReturnHintAlign; - Info.OrigRet = ArgInfo{ResRegs, RetTy, 0, ISD::ArgFlagsTy{}}; + Info.OrigRet = ArgInfo{ResRegs, RetTy, 0, getAttributesForReturn(CB)}; if (!Info.OrigRet.Ty->isVoidTy()) { setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB); @@ -155,6 +164,12 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, } } + auto Bundle = CB.getOperandBundle(LLVMContext::OB_kcfi); + if (Bundle && CB.isIndirectCall()) { + Info.CFIType = cast<ConstantInt>(Bundle->Inputs[0]); + assert(Info.CFIType->getType()->isIntegerTy(32) && "Invalid CFI type"); + } + Info.CB = &CB; Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees); Info.CallConv = CallConv; @@ -291,8 +306,8 @@ mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, Register UnmergeSrcReg; if (LCMTy != PartLLT) { assert(DstRegs.size() == 1); - return B.buildDeleteTrailingVectorElements(DstRegs[0], - B.buildMerge(LCMTy, SrcRegs)); + return B.buildDeleteTrailingVectorElements( + DstRegs[0], B.buildMergeLikeInstr(LCMTy, SrcRegs)); } else { // We don't need to widen anything if we're extracting a scalar which was // promoted to a vector e.g. s8 -> v4s8 -> s8 @@ -371,11 +386,11 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs, assert(OrigRegs.size() == 1); LLT OrigTy = MRI.getType(OrigRegs[0]); - unsigned SrcSize = PartLLT.getSizeInBits().getFixedSize() * Regs.size(); + unsigned SrcSize = PartLLT.getSizeInBits().getFixedValue() * Regs.size(); if (SrcSize == OrigTy.getSizeInBits()) - B.buildMerge(OrigRegs[0], Regs); + B.buildMergeValues(OrigRegs[0], Regs); else { - auto Widened = B.buildMerge(LLT::scalar(SrcSize), Regs); + auto Widened = B.buildMergeLikeInstr(LLT::scalar(SrcSize), Regs); B.buildTrunc(OrigRegs[0], Widened); } @@ -443,7 +458,8 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs, assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0); for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) { - auto Merge = B.buildMerge(RealDstEltTy, Regs.take_front(PartsPerElt)); + auto Merge = + B.buildMergeLikeInstr(RealDstEltTy, Regs.take_front(PartsPerElt)); // Fix the type in case this is really a vector of pointers. MRI.setType(Merge.getReg(0), RealDstEltTy); EltMerges.push_back(Merge.getReg(0)); @@ -489,6 +505,15 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, return; } + if (SrcTy.isVector() && PartTy.isVector() && + PartTy.getScalarSizeInBits() == SrcTy.getScalarSizeInBits() && + SrcTy.getNumElements() < PartTy.getNumElements()) { + // A coercion like: v2f32 -> v4f32. + Register DstReg = DstRegs.front(); + B.buildPadVectorWithUndefElements(DstReg, SrcReg); + return; + } + LLT GCDTy = getGCDType(SrcTy, PartTy); if (GCDTy == PartTy) { // If this already evenly divisible, we can create a simple unmerge. @@ -525,7 +550,7 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, SmallVector<Register, 8> MergeParts(1, SrcReg); for (unsigned Size = SrcSize; Size != CoveringSize; Size += SrcSize) MergeParts.push_back(Undef); - UnmergeSrc = B.buildMerge(LCMTy, MergeParts).getReg(0); + UnmergeSrc = B.buildMergeLikeInstr(LCMTy, MergeParts).getReg(0); } } @@ -656,7 +681,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler, if (VA.needsCustom()) { std::function<void()> Thunk; unsigned NumArgRegs = Handler.assignCustomValue( - Args[i], makeArrayRef(ArgLocs).slice(j), &Thunk); + Args[i], ArrayRef(ArgLocs).slice(j), &Thunk); if (Thunk) DelayedOutgoingRegAssignments.emplace_back(Thunk); if (!NumArgRegs) @@ -1196,7 +1221,7 @@ static bool isCopyCompatibleType(LLT SrcTy, LLT DstTy) { DstTy = DstTy.getScalarType(); return (SrcTy.isPointer() && DstTy.isScalar()) || - (DstTy.isScalar() && SrcTy.isPointer()); + (DstTy.isPointer() && SrcTy.isScalar()); } void CallLowering::IncomingValueHandler::assignValueToReg(Register ValVReg, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp index 1a5fe3e84c17..748fa273d499 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -12,6 +12,7 @@ #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" @@ -52,7 +53,9 @@ class WorkListMaintainer : public GISelChangeObserver { WorkListTy &WorkList; /// The instructions that have been created but we want to report once they /// have their operands. This is only maintained if debug output is requested. - SmallPtrSet<const MachineInstr *, 4> CreatedInstrs; +#ifndef NDEBUG + SetVector<const MachineInstr *> CreatedInstrs; +#endif public: WorkListMaintainer(WorkListTy &WorkList) : WorkList(WorkList) {} @@ -132,6 +135,7 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF, // Erase dead insts before even adding to the list. if (isTriviallyDead(CurMI, *MRI)) { LLVM_DEBUG(dbgs() << CurMI << "Is dead; erasing.\n"); + llvm::salvageDebugInfo(*MRI, CurMI); CurMI.eraseFromParent(); continue; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 05a25bc3078e..af4bb1634746 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -27,10 +27,13 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/Support/Casting.h" #include "llvm/Support/DivisionByConstantInfo.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" +#include <cmath> +#include <optional> #include <tuple> #define DEBUG_TYPE "gi-combiner" @@ -46,11 +49,12 @@ static cl::opt<bool> "legal for the GlobalISel combiner")); CombinerHelper::CombinerHelper(GISelChangeObserver &Observer, - MachineIRBuilder &B, GISelKnownBits *KB, - MachineDominatorTree *MDT, + MachineIRBuilder &B, bool IsPreLegalize, + GISelKnownBits *KB, MachineDominatorTree *MDT, const LegalizerInfo *LI) : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB), - MDT(MDT), LI(LI), RBI(Builder.getMF().getSubtarget().getRegBankInfo()), + MDT(MDT), IsPreLegalize(IsPreLegalize), LI(LI), + RBI(Builder.getMF().getSubtarget().getRegBankInfo()), TRI(Builder.getMF().getSubtarget().getRegisterInfo()) { (void)this->KB; } @@ -93,8 +97,8 @@ static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) { /// \param MemOffset2Idx maps memory offsets to address offsets. /// \param LowestIdx is the lowest index in \p MemOffset2Idx. /// -/// \returns true if the map corresponds to a big endian byte pattern, false -/// if it corresponds to a little endian byte pattern, and None otherwise. +/// \returns true if the map corresponds to a big endian byte pattern, false if +/// it corresponds to a little endian byte pattern, and std::nullopt otherwise. /// /// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns /// are as follows: @@ -104,24 +108,24 @@ static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) { /// 1 1 2 /// 2 2 1 /// 3 3 0 -static Optional<bool> +static std::optional<bool> isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, int64_t LowestIdx) { // Need at least two byte positions to decide on endianness. unsigned Width = MemOffset2Idx.size(); if (Width < 2) - return None; + return std::nullopt; bool BigEndian = true, LittleEndian = true; for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) { auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset); if (MemOffsetAndIdx == MemOffset2Idx.end()) - return None; + return std::nullopt; const int64_t Idx = MemOffsetAndIdx->second - LowestIdx; assert(Idx >= 0 && "Expected non-negative byte offset?"); LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset); BigEndian &= Idx == bigEndianByteAt(Width, MemOffset); if (!BigEndian && !LittleEndian) - return None; + return std::nullopt; } assert((BigEndian != LittleEndian) && @@ -129,7 +133,7 @@ isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, return BigEndian; } -bool CombinerHelper::isPreLegalize() const { return !LI; } +bool CombinerHelper::isPreLegalize() const { return IsPreLegalize; } bool CombinerHelper::isLegal(const LegalityQuery &Query) const { assert(LI && "Must have LegalizerInfo to query isLegal!"); @@ -385,7 +389,7 @@ void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI, if (Ops.size() == 1) Builder.buildCopy(NewDstReg, Ops[0]); else - Builder.buildMerge(NewDstReg, Ops); + Builder.buildMergeLikeInstr(NewDstReg, Ops); MI.eraseFromParent(); replaceRegWith(MRI, DstReg, NewDstReg); @@ -485,6 +489,24 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { return false; } +static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) { + unsigned CandidateLoadOpc; + switch (ExtOpc) { + case TargetOpcode::G_ANYEXT: + CandidateLoadOpc = TargetOpcode::G_LOAD; + break; + case TargetOpcode::G_SEXT: + CandidateLoadOpc = TargetOpcode::G_SEXTLOAD; + break; + case TargetOpcode::G_ZEXT: + CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD; + break; + default: + llvm_unreachable("Unexpected extend opc"); + } + return CandidateLoadOpc; +} + bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &Preferred) { // We match the loads and follow the uses to the extend instead of matching @@ -535,11 +557,12 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, if (MMO.isAtomic() && UseMI.getOpcode() != TargetOpcode::G_ANYEXT) continue; // Check for legality. - if (LI) { + if (!isPreLegalize()) { LegalityQuery::MemDesc MMDesc(MMO); + unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode()); LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg()); LLT SrcTy = MRI.getType(LoadMI->getPointerReg()); - if (LI->getAction({LoadMI->getOpcode(), {UseTy, SrcTy}, {MMDesc}}) + if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}}) .Action != LegalizeActions::Legal) continue; } @@ -587,12 +610,8 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI, }; Observer.changingInstr(MI); - MI.setDesc( - Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT - ? TargetOpcode::G_SEXTLOAD - : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT - ? TargetOpcode::G_ZEXTLOAD - : TargetOpcode::G_LOAD)); + unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode); + MI.setDesc(Builder.getTII().get(LoadOpc)); // Rewrite all the uses to fix up the types. auto &LoadValue = MI.getOperand(0); @@ -1266,12 +1285,12 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { LegalizerHelper::LegalizeResult::Legalized; } -static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy, - const Register Op, - const MachineRegisterInfo &MRI) { +static std::optional<APFloat> +constantFoldFpUnary(unsigned Opcode, LLT DstTy, const Register Op, + const MachineRegisterInfo &MRI) { const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI); if (!MaybeCst) - return None; + return std::nullopt; APFloat V = MaybeCst->getValueAPF(); switch (Opcode) { @@ -1308,8 +1327,8 @@ static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy, return V; } -bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI, - Optional<APFloat> &Cst) { +bool CombinerHelper::matchCombineConstantFoldFpUnary( + MachineInstr &MI, std::optional<APFloat> &Cst) { Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); LLT DstTy = MRI.getType(DstReg); @@ -1317,8 +1336,8 @@ bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI, return Cst.has_value(); } -void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI, - Optional<APFloat> &Cst) { +void CombinerHelper::applyCombineConstantFoldFpUnary( + MachineInstr &MI, std::optional<APFloat> &Cst) { assert(Cst && "Optional is unexpectedly empty!"); Builder.setInstrAndDebugLoc(MI); MachineFunction &MF = Builder.getMF(); @@ -1580,6 +1599,13 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI, Register Shift1 = Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0); + // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same + // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when + // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we + // remove old shift1. And it will cause crash later. So erase it earlier to + // avoid the crash. + MatchInfo.Shift2->eraseFromParent(); + Register Shift2Const = MI.getOperand(2).getReg(); Register Shift2 = Builder .buildInstr(Opcode, {DestType}, @@ -1589,8 +1615,7 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI, Register Dest = MI.getOperand(0).getReg(); Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2}); - // These were one use so it's safe to remove them. - MatchInfo.Shift2->eraseFromParent(); + // This was one use so it's safe to remove it. MatchInfo.Logic->eraseFromParent(); MI.eraseFromParent(); @@ -1706,7 +1731,7 @@ bool CombinerHelper::matchCombineUnmergeMergeToPlainValues( auto &Unmerge = cast<GUnmerge>(MI); Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI); - auto *SrcInstr = getOpcodeDef<GMergeLikeOp>(SrcReg, MRI); + auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI); if (!SrcInstr) return false; @@ -1947,7 +1972,7 @@ void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI, } auto Zero = Builder.buildConstant(HalfTy, 0); - Builder.buildMerge(DstReg, { Narrowed, Zero }); + Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero}); } else if (MI.getOpcode() == TargetOpcode::G_SHL) { Register Narrowed = Unmerge.getReg(0); // dst = G_SHL s64:x, C for C >= 32 @@ -1960,7 +1985,7 @@ void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI, } auto Zero = Builder.buildConstant(HalfTy, 0); - Builder.buildMerge(DstReg, { Zero, Narrowed }); + Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed}); } else { assert(MI.getOpcode() == TargetOpcode::G_ASHR); auto Hi = Builder.buildAShr( @@ -1970,13 +1995,13 @@ void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI, if (ShiftVal == HalfSize) { // (G_ASHR i64:x, 32) -> // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31) - Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi }); + Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi}); } else if (ShiftVal == Size - 1) { // Don't need a second shift. // (G_ASHR i64:x, 63) -> // %narrowed = (G_ASHR hi_32(x), 31) // G_MERGE_VALUES %narrowed, %narrowed - Builder.buildMerge(DstReg, { Hi, Hi }); + Builder.buildMergeLikeInstr(DstReg, {Hi, Hi}); } else { auto Lo = Builder.buildAShr( HalfTy, Unmerge.getReg(1), @@ -1984,7 +2009,7 @@ void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI, // (G_ASHR i64:x, C) ->, for C >= 32 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31) - Builder.buildMerge(DstReg, { Lo, Hi }); + Builder.buildMergeLikeInstr(DstReg, {Lo, Hi}); } } @@ -2019,12 +2044,6 @@ void CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) { MI.eraseFromParent(); } -bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) { - assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); - Register SrcReg = MI.getOperand(1).getReg(); - return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg))); -} - void CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) { assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); Register DstReg = MI.getOperand(0).getReg(); @@ -2195,19 +2214,6 @@ void CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) { MI.eraseFromParent(); } -bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) { - assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG"); - Register SrcReg = MI.getOperand(1).getReg(); - return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg))); -} - -bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) { - assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); - Src = MI.getOperand(1).getReg(); - Register AbsSrc; - return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc))); -} - bool CombinerHelper::matchCombineFAbsOfFNeg(MachineInstr &MI, BuildFnTy &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); @@ -2260,44 +2266,109 @@ void CombinerHelper::applyCombineTruncOfExt( MI.eraseFromParent(); } -bool CombinerHelper::matchCombineTruncOfShl( - MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - Register ShiftSrc; - Register ShiftAmt; - - if (MRI.hasOneNonDBGUse(SrcReg) && - mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) && - isLegalOrBeforeLegalizer( - {TargetOpcode::G_SHL, - {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) { - KnownBits Known = KB->getKnownBits(ShiftAmt); - unsigned Size = DstTy.getSizeInBits(); - if (Known.countMaxActiveBits() <= Log2_32(Size)) { - MatchInfo = std::make_pair(ShiftSrc, ShiftAmt); - return true; - } - } - return false; +static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy) { + const unsigned ShiftSize = ShiftTy.getScalarSizeInBits(); + const unsigned TruncSize = TruncTy.getScalarSizeInBits(); + + // ShiftTy > 32 > TruncTy -> 32 + if (ShiftSize > 32 && TruncSize < 32) + return ShiftTy.changeElementSize(32); + + // TODO: We could also reduce to 16 bits, but that's more target-dependent. + // Some targets like it, some don't, some only like it under certain + // conditions/processor versions, etc. + // A TL hook might be needed for this. + + // Don't combine + return ShiftTy; } -void CombinerHelper::applyCombineTruncOfShl( - MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { +bool CombinerHelper::matchCombineTruncOfShift( + MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); + + if (!MRI.hasOneNonDBGUse(SrcReg)) + return false; + + LLT SrcTy = MRI.getType(SrcReg); LLT DstTy = MRI.getType(DstReg); - MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); - Register ShiftSrc = MatchInfo.first; - Register ShiftAmt = MatchInfo.second; + MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI); + const auto &TL = getTargetLowering(); + + LLT NewShiftTy; + switch (SrcMI->getOpcode()) { + default: + return false; + case TargetOpcode::G_SHL: { + NewShiftTy = DstTy; + + // Make sure new shift amount is legal. + KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg()); + if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits())) + return false; + break; + } + case TargetOpcode::G_LSHR: + case TargetOpcode::G_ASHR: { + // For right shifts, we conservatively do not do the transform if the TRUNC + // has any STORE users. The reason is that if we change the type of the + // shift, we may break the truncstore combine. + // + // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)). + for (auto &User : MRI.use_instructions(DstReg)) + if (User.getOpcode() == TargetOpcode::G_STORE) + return false; + + NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy); + if (NewShiftTy == SrcTy) + return false; + + // Make sure we won't lose information by truncating the high bits. + KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg()); + if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() - + DstTy.getScalarSizeInBits())) + return false; + break; + } + } + + if (!isLegalOrBeforeLegalizer( + {SrcMI->getOpcode(), + {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}})) + return false; + + MatchInfo = std::make_pair(SrcMI, NewShiftTy); + return true; +} + +void CombinerHelper::applyCombineTruncOfShift( + MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) { Builder.setInstrAndDebugLoc(MI); - auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc); - Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags()); - MI.eraseFromParent(); + + MachineInstr *ShiftMI = MatchInfo.first; + LLT NewShiftTy = MatchInfo.second; + + Register Dst = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(Dst); + + Register ShiftAmt = ShiftMI->getOperand(2).getReg(); + Register ShiftSrc = ShiftMI->getOperand(1).getReg(); + ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0); + + Register NewShift = + Builder + .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt}) + .getReg(0); + + if (NewShiftTy == DstTy) + replaceRegWith(MRI, Dst, NewShift); + else + Builder.buildTrunc(Dst, NewShift); + + eraseInst(MI); } bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) { @@ -2332,6 +2403,19 @@ bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) { MRI); } +bool CombinerHelper::matchInsertExtractVecEltOutOfBounds(MachineInstr &MI) { + assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT || + MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) && + "Expected an insert/extract element op"); + LLT VecTy = MRI.getType(MI.getOperand(1).getReg()); + unsigned IdxIdx = + MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3; + auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI); + if (!Idx) + return false; + return Idx->getZExtValue() >= VecTy.getNumElements(); +} + bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) { GSelect &SelMI = cast<GSelect>(MI); auto Cst = @@ -2579,7 +2663,7 @@ bool CombinerHelper::matchCombineInsertVecElts( while (mi_match( CurrInst->getOperand(0).getReg(), MRI, m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) { - if (IntImm >= NumElts) + if (IntImm >= NumElts || IntImm < 0) return false; if (!MatchInfo[IntImm]) MatchInfo[IntImm] = TmpReg; @@ -2738,9 +2822,9 @@ bool CombinerHelper::matchAshrShlToSextInreg( assert(MI.getOpcode() == TargetOpcode::G_ASHR); int64_t ShlCst, AshrCst; Register Src; - // FIXME: detect splat constant vectors. if (!mi_match(MI.getOperand(0).getReg(), MRI, - m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst)))) + m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)), + m_ICstOrSplat(AshrCst)))) return false; if (ShlCst != AshrCst) return false; @@ -2812,12 +2896,6 @@ bool CombinerHelper::matchRedundantAnd(MachineInstr &MI, return false; Register AndDst = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(AndDst); - - // FIXME: This should be removed once GISelKnownBits supports vectors. - if (DstTy.isVector()) - return false; - Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); KnownBits LHSBits = KB->getKnownBits(LHS); @@ -2858,12 +2936,6 @@ bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) { return false; Register OrDst = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(OrDst); - - // FIXME: This should be removed once GISelKnownBits supports vectors. - if (DstTy.isVector()) - return false; - Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); KnownBits LHSBits = KB->getKnownBits(LHS); @@ -3190,14 +3262,12 @@ bool CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI, } Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags()); - Observer.erasingInstr(*Select); - Select->eraseFromParent(); MI.eraseFromParent(); return true; } -Optional<SmallVector<Register, 8>> +std::optional<SmallVector<Register, 8>> CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const { assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!"); // We want to detect if Root is part of a tree which represents a bunch @@ -3239,7 +3309,7 @@ CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const { // In the combine, we want to elimate the entire tree. if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS)) - return None; + return std::nullopt; // If it's a G_OR, save it and continue to walk. If it's not, then it's // something that may be a load + arithmetic. @@ -3256,7 +3326,7 @@ CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const { // We're going to try and merge each register into a wider power-of-2 type, // so we ought to have an even number of registers. if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0) - return None; + return std::nullopt; return RegsToVisit; } @@ -3268,7 +3338,7 @@ CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const { /// e.g. x[i] << 24 /// /// \returns The load instruction and the byte offset it is moved into. -static Optional<std::pair<GZExtLoad *, int64_t>> +static std::optional<std::pair<GZExtLoad *, int64_t>> matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI) { assert(MRI.hasOneNonDBGUse(Reg) && @@ -3282,20 +3352,20 @@ matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, } if (Shift % MemSizeInBits != 0) - return None; + return std::nullopt; // TODO: Handle other types of loads. auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI); if (!Load) - return None; + return std::nullopt; if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits) - return None; + return std::nullopt; return std::make_pair(Load, Shift / MemSizeInBits); } -Optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>> +std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>> CombinerHelper::findLoadOffsetsForLoadOrCombine( SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) { @@ -3335,7 +3405,7 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine( // shifted) value. auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI); if (!LoadAndPos) - return None; + return std::nullopt; GZExtLoad *Load; int64_t DstPos; std::tie(Load, DstPos) = *LoadAndPos; @@ -3346,14 +3416,14 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine( if (!MBB) MBB = LoadMBB; if (LoadMBB != MBB) - return None; + return std::nullopt; // Make sure that the MachineMemOperands of every seen load are compatible. auto &LoadMMO = Load->getMMO(); if (!MMO) MMO = &LoadMMO; if (MMO->getAddrSpace() != LoadMMO.getAddrSpace()) - return None; + return std::nullopt; // Find out what the base pointer and index for the load is. Register LoadPtr; @@ -3366,7 +3436,7 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine( // Don't combine things like a[i], a[i] -> a bigger load. if (!SeenIdx.insert(Idx).second) - return None; + return std::nullopt; // Every load must share the same base pointer; don't combine things like: // @@ -3374,7 +3444,7 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine( if (!BasePtr.isValid()) BasePtr = LoadPtr; if (BasePtr != LoadPtr) - return None; + return std::nullopt; if (Idx < LowestIdx) { LowestIdx = Idx; @@ -3386,7 +3456,7 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine( // // a[i] << 16, a[i + k] << 16 -> a bigger load. if (!MemOffset2Idx.try_emplace(DstPos, Idx).second) - return None; + return std::nullopt; Loads.insert(Load); // Keep track of the position of the earliest/latest loads in the pattern. @@ -3421,9 +3491,9 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine( if (Loads.count(&MI)) continue; if (MI.isLoadFoldBarrier()) - return None; + return std::nullopt; if (Iter++ == MaxIter) - return None; + return std::nullopt; } return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad); @@ -3487,7 +3557,7 @@ bool CombinerHelper::matchLoadOrCombine( // pattern. If it does, then we can represent it using a load + possibly a // BSWAP. bool IsBigEndianTarget = MF.getDataLayout().isBigEndian(); - Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx); + std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx); if (!IsBigEndian) return false; bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian; @@ -3527,7 +3597,7 @@ bool CombinerHelper::matchLoadOrCombine( // Load must be allowed and fast on the target. LLVMContext &C = MF.getFunction().getContext(); auto &DL = MF.getDataLayout(); - bool Fast = false; + unsigned Fast = 0; if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) || !Fast) return false; @@ -3548,11 +3618,12 @@ bool CombinerHelper::matchLoadOrCombine( /// value found. /// On match, returns the start byte offset of the \p SrcVal that is being /// stored. -static Optional<int64_t> getTruncStoreByteOffset(GStore &Store, Register &SrcVal, - MachineRegisterInfo &MRI) { +static std::optional<int64_t> +getTruncStoreByteOffset(GStore &Store, Register &SrcVal, + MachineRegisterInfo &MRI) { Register TruncVal; if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal)))) - return None; + return std::nullopt; // The shift amount must be a constant multiple of the narrow type. // It is translated to the offset address in the wide source value "y". @@ -3570,21 +3641,21 @@ static Optional<int64_t> getTruncStoreByteOffset(GStore &Store, Register &SrcVal SrcVal = TruncVal; return 0; // If it's the lowest index store. } - return None; + return std::nullopt; } unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits(); if (ShiftAmt % NarrowBits!= 0) - return None; + return std::nullopt; const unsigned Offset = ShiftAmt / NarrowBits; if (SrcVal.isValid() && FoundSrcVal != SrcVal) - return None; + return std::nullopt; if (!SrcVal.isValid()) SrcVal = FoundSrcVal; else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal)) - return None; + return std::nullopt; return Offset; } @@ -3732,7 +3803,7 @@ bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI, const auto &DL = LastStore.getMF()->getDataLayout(); auto &C = LastStore.getMF()->getFunction().getContext(); // Check that a store of the wide type is both allowed and fast on the target - bool Fast = false; + unsigned Fast = 0; bool Allowed = getTargetLowering().allowsMemoryAccess( C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast); if (!Allowed || !Fast) @@ -3917,33 +3988,30 @@ bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI, // and find the source register that the index maps to. Register SrcVec = MI.getOperand(1).getReg(); LLT SrcTy = MRI.getType(SrcVec); - if (!isLegalOrBeforeLegalizer( - {TargetOpcode::G_BUILD_VECTOR, {SrcTy, SrcTy.getElementType()}})) - return false; auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements()) return false; unsigned VecIdx = Cst->Value.getZExtValue(); - MachineInstr *BuildVecMI = - getOpcodeDef(TargetOpcode::G_BUILD_VECTOR, SrcVec, MRI); - if (!BuildVecMI) { - BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR_TRUNC, SrcVec, MRI); - if (!BuildVecMI) - return false; - LLT ScalarTy = MRI.getType(BuildVecMI->getOperand(1).getReg()); - if (!isLegalOrBeforeLegalizer( - {TargetOpcode::G_BUILD_VECTOR_TRUNC, {SrcTy, ScalarTy}})) - return false; + + // Check if we have a build_vector or build_vector_trunc with an optional + // trunc in front. + MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec); + if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) { + SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg()); } + if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR && + SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC) + return false; + EVT Ty(getMVTForLLT(SrcTy)); if (!MRI.hasOneNonDBGUse(SrcVec) && !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty)) return false; - Reg = BuildVecMI->getOperand(VecIdx + 1).getReg(); + Reg = SrcVecMI->getOperand(VecIdx + 1).getReg(); return true; } @@ -4146,7 +4214,7 @@ bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI, auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg()); auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg()); - Optional<bool> KnownVal; + std::optional<bool> KnownVal; switch (Pred) { default: llvm_unreachable("Unexpected G_ICMP predicate?"); @@ -4542,7 +4610,7 @@ bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI, // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C) // if and only if (G_PTR_ADD X, C) has one use. Register LHSBase; - Optional<ValueAndVReg> LHSCstOff; + std::optional<ValueAndVReg> LHSCstOff; if (!mi_match(MI.getBaseReg(), MRI, m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff))))) return false; @@ -4554,8 +4622,10 @@ bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI, // doesn't happen. LHSPtrAdd->moveBefore(&MI); Register RHSReg = MI.getOffsetReg(); + // set VReg will cause type mismatch if it comes from extend/trunc + auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value); Observer.changingInstr(MI); - MI.getOperand(2).setReg(LHSCstOff->VReg); + MI.getOperand(2).setReg(NewCst.getReg(0)); Observer.changedInstr(MI); Observer.changingInstr(*LHSPtrAdd); LHSPtrAdd->getOperand(2).setReg(RHSReg); @@ -4781,6 +4851,83 @@ bool CombinerHelper::matchAddOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) { return true; } +bool CombinerHelper::matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) { + // (G_*ADDE x, y, 0) -> (G_*ADDO x, y) + // (G_*SUBE x, y, 0) -> (G_*SUBO x, y) + assert(MI.getOpcode() == TargetOpcode::G_UADDE || + MI.getOpcode() == TargetOpcode::G_SADDE || + MI.getOpcode() == TargetOpcode::G_USUBE || + MI.getOpcode() == TargetOpcode::G_SSUBE); + if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0))) + return false; + MatchInfo = [&](MachineIRBuilder &B) { + unsigned NewOpcode; + switch (MI.getOpcode()) { + case TargetOpcode::G_UADDE: + NewOpcode = TargetOpcode::G_UADDO; + break; + case TargetOpcode::G_SADDE: + NewOpcode = TargetOpcode::G_SADDO; + break; + case TargetOpcode::G_USUBE: + NewOpcode = TargetOpcode::G_USUBO; + break; + case TargetOpcode::G_SSUBE: + NewOpcode = TargetOpcode::G_SSUBO; + break; + } + Observer.changingInstr(MI); + MI.setDesc(B.getTII().get(NewOpcode)); + MI.removeOperand(4); + Observer.changedInstr(MI); + }; + return true; +} + +bool CombinerHelper::matchSubAddSameReg(MachineInstr &MI, + BuildFnTy &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SUB); + Register Dst = MI.getOperand(0).getReg(); + // (x + y) - z -> x (if y == z) + // (x + y) - z -> y (if x == z) + Register X, Y, Z; + if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) { + Register ReplaceReg; + int64_t CstX, CstY; + if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) && + mi_match(Z, MRI, m_SpecificICstOrSplat(CstY)))) + ReplaceReg = X; + else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) && + mi_match(Z, MRI, m_SpecificICstOrSplat(CstX)))) + ReplaceReg = Y; + if (ReplaceReg) { + MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); }; + return true; + } + } + + // x - (y + z) -> 0 - y (if x == z) + // x - (y + z) -> 0 - z (if x == y) + if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) { + Register ReplaceReg; + int64_t CstX; + if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) && + mi_match(Z, MRI, m_SpecificICstOrSplat(CstX)))) + ReplaceReg = Y; + else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) && + mi_match(Y, MRI, m_SpecificICstOrSplat(CstX)))) + ReplaceReg = Z; + if (ReplaceReg) { + MatchInfo = [=](MachineIRBuilder &B) { + auto Zero = B.buildConstant(MRI.getType(Dst), 0); + B.buildSub(Dst, Zero, ReplaceReg); + }; + return true; + } + } + return false; +} + MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_UDIV); auto &UDiv = cast<GenericMachineInstr>(MI); @@ -4801,34 +4948,33 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) { auto BuildUDIVPattern = [&](const Constant *C) { auto *CI = cast<ConstantInt>(C); const APInt &Divisor = CI->getValue(); - UnsignedDivisionByConstantInfo magics = - UnsignedDivisionByConstantInfo::get(Divisor); + + bool SelNPQ = false; + APInt Magic(Divisor.getBitWidth(), 0); unsigned PreShift = 0, PostShift = 0; - // If the divisor is even, we can avoid using the expensive fixup by - // shifting the divided value upfront. - if (magics.IsAdd && !Divisor[0]) { - PreShift = Divisor.countTrailingZeros(); - // Get magic number for the shifted divisor. - magics = - UnsignedDivisionByConstantInfo::get(Divisor.lshr(PreShift), PreShift); - assert(!magics.IsAdd && "Should use cheap fixup now"); - } + // Magic algorithm doesn't work for division by 1. We need to emit a select + // at the end. + // TODO: Use undef values for divisor of 1. + if (!Divisor.isOneValue()) { + UnsignedDivisionByConstantInfo magics = + UnsignedDivisionByConstantInfo::get(Divisor); - unsigned SelNPQ; - if (!magics.IsAdd || Divisor.isOneValue()) { - assert(magics.ShiftAmount < Divisor.getBitWidth() && + Magic = std::move(magics.Magic); + + assert(magics.PreShift < Divisor.getBitWidth() && "We shouldn't generate an undefined shift!"); - PostShift = magics.ShiftAmount; - SelNPQ = false; - } else { - PostShift = magics.ShiftAmount - 1; - SelNPQ = true; + assert(magics.PostShift < Divisor.getBitWidth() && + "We shouldn't generate an undefined shift!"); + assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift"); + PreShift = magics.PreShift; + PostShift = magics.PostShift; + SelNPQ = magics.IsAdd; } PreShifts.push_back( MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0)); - MagicFactors.push_back(MIB.buildConstant(ScalarTy, magics.Magic).getReg(0)); + MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0)); NPQFactors.push_back( MIB.buildConstant(ScalarTy, SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1) @@ -4935,6 +5081,108 @@ void CombinerHelper::applyUDivByConst(MachineInstr &MI) { replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg()); } +bool CombinerHelper::matchSDivByConst(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV"); + Register Dst = MI.getOperand(0).getReg(); + Register RHS = MI.getOperand(2).getReg(); + LLT DstTy = MRI.getType(Dst); + + auto &MF = *MI.getMF(); + AttributeList Attr = MF.getFunction().getAttributes(); + const auto &TLI = getTargetLowering(); + LLVMContext &Ctx = MF.getFunction().getContext(); + auto &DL = MF.getDataLayout(); + if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr)) + return false; + + // Don't do this for minsize because the instruction sequence is usually + // larger. + if (MF.getFunction().hasMinSize()) + return false; + + // If the sdiv has an 'exact' flag we can use a simpler lowering. + if (MI.getFlag(MachineInstr::MIFlag::IsExact)) { + return matchUnaryPredicate( + MRI, RHS, [](const Constant *C) { return C && !C->isZeroValue(); }); + } + + // Don't support the general case for now. + return false; +} + +void CombinerHelper::applySDivByConst(MachineInstr &MI) { + auto *NewMI = buildSDivUsingMul(MI); + replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg()); +} + +MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV"); + auto &SDiv = cast<GenericMachineInstr>(MI); + Register Dst = SDiv.getReg(0); + Register LHS = SDiv.getReg(1); + Register RHS = SDiv.getReg(2); + LLT Ty = MRI.getType(Dst); + LLT ScalarTy = Ty.getScalarType(); + LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty); + LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType(); + auto &MIB = Builder; + MIB.setInstrAndDebugLoc(MI); + + bool UseSRA = false; + SmallVector<Register, 16> Shifts, Factors; + + auto *RHSDef = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI)); + bool IsSplat = getIConstantSplatVal(*RHSDef, MRI).has_value(); + + auto BuildSDIVPattern = [&](const Constant *C) { + // Don't recompute inverses for each splat element. + if (IsSplat && !Factors.empty()) { + Shifts.push_back(Shifts[0]); + Factors.push_back(Factors[0]); + return true; + } + + auto *CI = cast<ConstantInt>(C); + APInt Divisor = CI->getValue(); + unsigned Shift = Divisor.countTrailingZeros(); + if (Shift) { + Divisor.ashrInPlace(Shift); + UseSRA = true; + } + + // Calculate the multiplicative inverse modulo BW. + // 2^W requires W + 1 bits, so we have to extend and then truncate. + unsigned W = Divisor.getBitWidth(); + APInt Factor = Divisor.zext(W + 1) + .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) + .trunc(W); + Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0)); + Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0)); + return true; + }; + + // Collect all magic values from the build vector. + bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern); + (void)Matched; + assert(Matched && "Expected unary predicate match to succeed"); + + Register Shift, Factor; + if (Ty.isVector()) { + Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0); + Factor = MIB.buildBuildVector(Ty, Factors).getReg(0); + } else { + Shift = Shifts[0]; + Factor = Factors[0]; + } + + Register Res = LHS; + + if (UseSRA) + Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0); + + return MIB.buildMul(Ty, Res, Factor); +} + bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_UMULH); Register RHS = MI.getOperand(2).getReg(); @@ -5014,6 +5262,38 @@ bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI, return true; } +bool CombinerHelper::matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FSUB); + + Register LHS = MI.getOperand(1).getReg(); + MatchInfo = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + + const auto LHSCst = Ty.isVector() + ? getFConstantSplat(LHS, MRI, /* allowUndef */ true) + : getFConstantVRegValWithLookThrough(LHS, MRI); + if (!LHSCst) + return false; + + // -0.0 is always allowed + if (LHSCst->Value.isNegZero()) + return true; + + // +0.0 is only allowed if nsz is set. + if (LHSCst->Value.isPosZero()) + return MI.getFlag(MachineInstr::FmNsz); + + return false; +} + +void CombinerHelper::applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) { + Builder.setInstrAndDebugLoc(MI); + Register Dst = MI.getOperand(0).getReg(); + Builder.buildFNeg( + Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0)); + eraseInst(MI); +} + /// Checks if \p MI is TargetOpcode::G_FMUL and contractable either /// due to global flags or MachineInstr flags. static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) { @@ -5045,7 +5325,7 @@ bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI, return false; // Floating-point multiply-add with intermediate rounding. - HasFMAD = (LI && TLI.isFMADLegal(MI, DstType)); + HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType)); // Floating-point multiply-add without intermediate rounding. bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) && isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}}); @@ -5670,6 +5950,241 @@ bool CombinerHelper::matchAddSubSameReg(MachineInstr &MI, Register &Src) { return CheckFold(LHS, RHS) || CheckFold(RHS, LHS); } +bool CombinerHelper::matchBuildVectorIdentityFold(MachineInstr &MI, + Register &MatchInfo) { + // This combine folds the following patterns: + // + // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k)) + // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k))) + // into + // x + // if + // k == sizeof(VecEltTy)/2 + // type(x) == type(dst) + // + // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef) + // into + // x + // if + // type(x) == type(dst) + + LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg()); + LLT DstEltTy = DstVecTy.getElementType(); + + Register Lo, Hi; + + if (mi_match( + MI, MRI, + m_GBuildVector(m_GTrunc(m_GBitcast(m_Reg(Lo))), m_GImplicitDef()))) { + MatchInfo = Lo; + return MRI.getType(MatchInfo) == DstVecTy; + } + + std::optional<ValueAndVReg> ShiftAmount; + const auto LoPattern = m_GBitcast(m_Reg(Lo)); + const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount)); + if (mi_match( + MI, MRI, + m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern), + m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) { + if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) { + MatchInfo = Lo; + return MRI.getType(MatchInfo) == DstVecTy; + } + } + + return false; +} + +bool CombinerHelper::matchTruncBuildVectorFold(MachineInstr &MI, + Register &MatchInfo) { + // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x + // if type(x) == type(G_TRUNC) + if (!mi_match(MI.getOperand(1).getReg(), MRI, + m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg())))) + return false; + + return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg()); +} + +bool CombinerHelper::matchTruncLshrBuildVectorFold(MachineInstr &MI, + Register &MatchInfo) { + // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with + // y if K == size of vector element type + std::optional<ValueAndVReg> ShiftAmt; + if (!mi_match(MI.getOperand(1).getReg(), MRI, + m_GLShr(m_GBitcast(m_GBuildVector(m_Reg(), m_Reg(MatchInfo))), + m_GCst(ShiftAmt)))) + return false; + + LLT MatchTy = MRI.getType(MatchInfo); + return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() && + MatchTy == MRI.getType(MI.getOperand(0).getReg()); +} + +unsigned CombinerHelper::getFPMinMaxOpcForSelect( + CmpInst::Predicate Pred, LLT DstTy, + SelectPatternNaNBehaviour VsNaNRetVal) const { + assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE && + "Expected a NaN behaviour?"); + // Choose an opcode based off of legality or the behaviour when one of the + // LHS/RHS may be NaN. + switch (Pred) { + default: + return 0; + case CmpInst::FCMP_UGT: + case CmpInst::FCMP_UGE: + case CmpInst::FCMP_OGT: + case CmpInst::FCMP_OGE: + if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER) + return TargetOpcode::G_FMAXNUM; + if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN) + return TargetOpcode::G_FMAXIMUM; + if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}})) + return TargetOpcode::G_FMAXNUM; + if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}})) + return TargetOpcode::G_FMAXIMUM; + return 0; + case CmpInst::FCMP_ULT: + case CmpInst::FCMP_ULE: + case CmpInst::FCMP_OLT: + case CmpInst::FCMP_OLE: + if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER) + return TargetOpcode::G_FMINNUM; + if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN) + return TargetOpcode::G_FMINIMUM; + if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}})) + return TargetOpcode::G_FMINNUM; + if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}})) + return 0; + return TargetOpcode::G_FMINIMUM; + } +} + +CombinerHelper::SelectPatternNaNBehaviour +CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS, + bool IsOrderedComparison) const { + bool LHSSafe = isKnownNeverNaN(LHS, MRI); + bool RHSSafe = isKnownNeverNaN(RHS, MRI); + // Completely unsafe. + if (!LHSSafe && !RHSSafe) + return SelectPatternNaNBehaviour::NOT_APPLICABLE; + if (LHSSafe && RHSSafe) + return SelectPatternNaNBehaviour::RETURNS_ANY; + // An ordered comparison will return false when given a NaN, so it + // returns the RHS. + if (IsOrderedComparison) + return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN + : SelectPatternNaNBehaviour::RETURNS_OTHER; + // An unordered comparison will return true when given a NaN, so it + // returns the LHS. + return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER + : SelectPatternNaNBehaviour::RETURNS_NAN; +} + +bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond, + Register TrueVal, Register FalseVal, + BuildFnTy &MatchInfo) { + // Match: select (fcmp cond x, y) x, y + // select (fcmp cond x, y) y, x + // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition. + LLT DstTy = MRI.getType(Dst); + // Bail out early on pointers, since we'll never want to fold to a min/max. + if (DstTy.isPointer()) + return false; + // Match a floating point compare with a less-than/greater-than predicate. + // TODO: Allow multiple users of the compare if they are all selects. + CmpInst::Predicate Pred; + Register CmpLHS, CmpRHS; + if (!mi_match(Cond, MRI, + m_OneNonDBGUse( + m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) || + CmpInst::isEquality(Pred)) + return false; + SelectPatternNaNBehaviour ResWithKnownNaNInfo = + computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred)); + if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE) + return false; + if (TrueVal == CmpRHS && FalseVal == CmpLHS) { + std::swap(CmpLHS, CmpRHS); + Pred = CmpInst::getSwappedPredicate(Pred); + if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN) + ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER; + else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER) + ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN; + } + if (TrueVal != CmpLHS || FalseVal != CmpRHS) + return false; + // Decide what type of max/min this should be based off of the predicate. + unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo); + if (!Opc || !isLegal({Opc, {DstTy}})) + return false; + // Comparisons between signed zero and zero may have different results... + // unless we have fmaximum/fminimum. In that case, we know -0 < 0. + if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) { + // We don't know if a comparison between two 0s will give us a consistent + // result. Be conservative and only proceed if at least one side is + // non-zero. + auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI); + if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) { + KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI); + if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) + return false; + } + } + MatchInfo = [=](MachineIRBuilder &B) { + B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS}); + }; + return true; +} + +bool CombinerHelper::matchSimplifySelectToMinMax(MachineInstr &MI, + BuildFnTy &MatchInfo) { + // TODO: Handle integer cases. + assert(MI.getOpcode() == TargetOpcode::G_SELECT); + // Condition may be fed by a truncated compare. + Register Cond = MI.getOperand(1).getReg(); + Register MaybeTrunc; + if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc))))) + Cond = MaybeTrunc; + Register Dst = MI.getOperand(0).getReg(); + Register TrueVal = MI.getOperand(2).getReg(); + Register FalseVal = MI.getOperand(3).getReg(); + return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo); +} + +bool CombinerHelper::matchRedundantBinOpInEquality(MachineInstr &MI, + BuildFnTy &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_ICMP); + // (X + Y) == X --> Y == 0 + // (X + Y) != X --> Y != 0 + // (X - Y) == X --> Y == 0 + // (X - Y) != X --> Y != 0 + // (X ^ Y) == X --> Y == 0 + // (X ^ Y) != X --> Y != 0 + Register Dst = MI.getOperand(0).getReg(); + CmpInst::Predicate Pred; + Register X, Y, OpLHS, OpRHS; + bool MatchedSub = mi_match( + Dst, MRI, + m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y)))); + if (MatchedSub && X != OpLHS) + return false; + if (!MatchedSub) { + if (!mi_match(Dst, MRI, + m_c_GICmp(m_Pred(Pred), m_Reg(X), + m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)), + m_GXor(m_Reg(OpLHS), m_Reg(OpRHS)))))) + return false; + Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register(); + } + MatchInfo = [=](MachineIRBuilder &B) { + auto Zero = B.buildConstant(MRI.getType(Y), 0); + B.buildICmp(Pred, Dst, Y, Zero); + }; + return CmpInst::isEquality(Pred) && Y.isValid(); +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index 4f03af0fce82..bfbe7e1c3e55 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -39,8 +39,7 @@ Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) { return computeKnownAlignment(MI->getOperand(1).getReg(), Depth); case TargetOpcode::G_ASSERT_ALIGN: { // TODO: Min with source - int64_t LogAlign = MI->getOperand(2).getImm(); - return Align(1ull << LogAlign); + return Align(MI->getOperand(2).getImm()); } case TargetOpcode::G_FRAME_INDEX: { int FrameIdx = MI->getOperand(1).getIndex(); @@ -286,7 +285,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, LLT Ty = MRI.getType(MI.getOperand(1).getReg()); if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace())) break; - LLVM_FALLTHROUGH; + [[fallthrough]]; } case TargetOpcode::G_ADD: { computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, @@ -447,7 +446,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, if (DstTy.isVector()) break; // Fall through and handle them the same as zext/trunc. - LLVM_FALLTHROUGH; + [[fallthrough]]; case TargetOpcode::G_ASSERT_ZEXT: case TargetOpcode::G_ZEXT: case TargetOpcode::G_TRUNC: { @@ -472,9 +471,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, break; } case TargetOpcode::G_ASSERT_ALIGN: { - int64_t LogOfAlign = MI.getOperand(2).getImm(); - if (LogOfAlign == 0) - break; + int64_t LogOfAlign = Log2_64(MI.getOperand(2).getImm()); // TODO: Should use maximum with source // If a node is guaranteed to be aligned, set low zero bits accordingly as @@ -533,7 +530,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, // We can bound the space the count needs. Also, bits known to be zero can't // contribute to the population. unsigned BitsPossiblySet = Known2.countMaxPopulation(); - unsigned LowBits = Log2_32(BitsPossiblySet)+1; + unsigned LowBits = llvm::bit_width(BitsPossiblySet); Known.Zero.setBitsFrom(LowBits); // TODO: we could bound Known.One using the lower bound on the number of // bits which might be set provided by popcnt KnownOne2. @@ -714,6 +711,18 @@ unsigned GISelKnownBits::computeNumSignBits(Register R, break; } + case TargetOpcode::G_FCMP: + case TargetOpcode::G_ICMP: { + bool IsFP = Opcode == TargetOpcode::G_FCMP; + if (TyBits == 1) + break; + auto BC = TL.getBooleanContents(DstTy.isVector(), IsFP); + if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent) + return TyBits; // All bits are sign bits. + if (BC == TargetLowering::ZeroOrOneBooleanContent) + return TyBits - 1; // Every always-zero bit is a sign bit. + break; + } case TargetOpcode::G_INTRINSIC: case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: default: { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 2f9187bbf2ad..7d811dc0ad8f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -16,7 +16,9 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" @@ -61,6 +63,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -81,6 +84,7 @@ #include <cassert> #include <cstdint> #include <iterator> +#include <optional> #include <string> #include <utility> #include <vector> @@ -167,6 +171,7 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<StackProtector>(); AU.addRequired<TargetPassConfig>(); AU.addRequired<GISelCSEAnalysisWrapperPass>(); + AU.addRequired<AssumptionCacheTracker>(); if (OptLevel != CodeGenOpt::None) { AU.addRequired<BranchProbabilityInfoWrapperPass>(); AU.addRequired<AAResultsWrapperPass>(); @@ -1064,7 +1069,7 @@ void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB, LLT SwitchTy = getLLTForMVT(BB.RegVT); Register Cmp; - unsigned PopCount = countPopulation(B.Mask); + unsigned PopCount = llvm::popcount(B.Mask); if (PopCount == 1) { // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. @@ -1301,16 +1306,12 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { } auto &TLI = *MF->getSubtarget().getTargetLowering(); - MachineMemOperand::Flags Flags = TLI.getLoadMemOperandFlags(LI, *DL); + MachineMemOperand::Flags Flags = + TLI.getLoadMemOperandFlags(LI, *DL, AC, LibInfo); if (AA && !(Flags & MachineMemOperand::MOInvariant)) { if (AA->pointsToConstantMemory( MemoryLocation(Ptr, LocationSize::precise(StoreSize), AAInfo))) { Flags |= MachineMemOperand::MOInvariant; - - // FIXME: pointsToConstantMemory probably does not imply dereferenceable, - // but the previous usage implied it did. Probably should check - // isDereferenceableAndAlignedPointer. - Flags |= MachineMemOperand::MODereferenceable; } } @@ -1882,10 +1883,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder) { if (auto *MI = dyn_cast<AnyMemIntrinsic>(&CI)) { if (ORE->enabled()) { - const Function &F = *MI->getParent()->getParent(); - auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); - if (MemoryOpRemark::canHandle(MI, TLI)) { - MemoryOpRemark R(*ORE, "gisel-irtranslator-memsize", *DL, TLI); + if (MemoryOpRemark::canHandle(MI, *LibInfo)) { + MemoryOpRemark R(*ORE, "gisel-irtranslator-memsize", *DL, *LibInfo); R.visit(MI); } } @@ -2301,7 +2300,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, // Convert the metadata argument to a constant integer Metadata *MD = cast<MetadataAsValue>(CI.getArgOperand(1))->getMetadata(); - Optional<RoundingMode> RoundMode = + std::optional<RoundingMode> RoundMode = convertStrToRoundingMode(cast<MDString>(MD)->getString()); // Add the Rounding mode as an integer @@ -2313,6 +2312,17 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return true; } + case Intrinsic::is_fpclass: { + Value *FpValue = CI.getOperand(0); + ConstantInt *TestMaskValue = cast<ConstantInt>(CI.getOperand(1)); + + MIRBuilder + .buildInstr(TargetOpcode::G_IS_FPCLASS, {getOrCreateVReg(CI)}, + {getOrCreateVReg(*FpValue)}) + .addImm(TestMaskValue->getZExtValue()); + + return true; + } #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" @@ -2352,7 +2362,7 @@ bool IRTranslator::translateCallBase(const CallBase &CB, SwiftInVReg = MRI->createGenericVirtualRegister(Ty); MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt( &CB, &MIRBuilder.getMBB(), Arg)); - Args.emplace_back(makeArrayRef(SwiftInVReg)); + Args.emplace_back(ArrayRef(SwiftInVReg)); SwiftErrorVReg = SwiftError.getOrCreateVRegDefAt(&CB, &MIRBuilder.getMBB(), Arg); continue; @@ -2362,10 +2372,8 @@ bool IRTranslator::translateCallBase(const CallBase &CB, if (auto *CI = dyn_cast<CallInst>(&CB)) { if (ORE->enabled()) { - const Function &F = *CI->getParent()->getParent(); - auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); - if (MemoryOpRemark::canHandle(CI, TLI)) { - MemoryOpRemark R(*ORE, "gisel-irtranslator-memsize", *DL, TLI); + if (MemoryOpRemark::canHandle(CI, *LibInfo)) { + MemoryOpRemark R(*ORE, "gisel-irtranslator-memsize", *DL, *LibInfo); R.visit(CI); } } @@ -2403,6 +2411,10 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { if (CI.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget)) return false; + // FIXME: support statepoints and related. + if (isa<GCStatepointInst, GCRelocateInst, GCResultInst>(U)) + return false; + if (CI.isInlineAsm()) return translateInlineAsm(CI, MIRBuilder); @@ -2475,8 +2487,16 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { LLT MemTy = Info.memVT.isSimple() ? getLLTForMVT(Info.memVT.getSimpleVT()) : LLT::scalar(Info.memVT.getStoreSizeInBits()); - MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal), - Info.flags, MemTy, Alignment)); + + // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic + // didn't yield anything useful. + MachinePointerInfo MPI; + if (Info.ptrVal) + MPI = MachinePointerInfo(Info.ptrVal, Info.offset); + else if (Info.fallbackAddressSpace) + MPI = MachinePointerInfo(*Info.fallbackAddressSpace); + MIB.addMemOperand( + MF->getMachineMemOperand(MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata())); } return true; @@ -2566,14 +2586,12 @@ bool IRTranslator::translateInvoke(const User &U, bool LowerInlineAsm = I.isInlineAsm(); bool NeedEHLabel = true; - // If it can't throw then use a fast-path without emitting EH labels. - if (LowerInlineAsm) - NeedEHLabel = (cast<InlineAsm>(I.getCalledOperand()))->canThrow(); // Emit the actual call, bracketed by EH_LABELs so that the MF knows about // the region covered by the try. MCSymbol *BeginSymbol = nullptr; if (NeedEHLabel) { + MIRBuilder.buildInstr(TargetOpcode::G_INVOKE_REGION_START); BeginSymbol = Context.createTempSymbol(); MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol); } @@ -2808,7 +2826,7 @@ bool IRTranslator::translateExtractElement(const User &U, Register Idx; if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) { if (CI->getBitWidth() != PreferredVecIdxWidth) { - APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth); + APInt NewIdx = CI->getValue().zextOrTrunc(PreferredVecIdxWidth); auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx); Idx = getOrCreateVReg(*NewIdxCI); } @@ -2817,7 +2835,7 @@ bool IRTranslator::translateExtractElement(const User &U, Idx = getOrCreateVReg(*U.getOperand(1)); if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) { const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth); - Idx = MIRBuilder.buildSExtOrTrunc(VecIdxTy, Idx).getReg(0); + Idx = MIRBuilder.buildZExtOrTrunc(VecIdxTy, Idx).getReg(0); } MIRBuilder.buildExtractVectorElement(Res, Val, Idx); return true; @@ -2934,6 +2952,12 @@ bool IRTranslator::translateAtomicRMW(const User &U, case AtomicRMWInst::FMin: Opcode = TargetOpcode::G_ATOMICRMW_FMIN; break; + case AtomicRMWInst::UIncWrap: + Opcode = TargetOpcode::G_ATOMICRMW_UINC_WRAP; + break; + case AtomicRMWInst::UDecWrap: + Opcode = TargetOpcode::G_ATOMICRMW_UDEC_WRAP; + break; } MIRBuilder.buildAtomicRMW( @@ -3003,6 +3027,7 @@ void IRTranslator::finishPendingPhis() { bool IRTranslator::translate(const Instruction &Inst) { CurBuilder->setDebugLoc(Inst.getDebugLoc()); + CurBuilder->setPCSections(Inst.getMetadata(LLVMContext::MD_pcsections)); auto &TLI = *MF->getSubtarget().getTargetLowering(); if (TLI.fallBackToDAGISel(Inst)) @@ -3393,6 +3418,9 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { FuncInfo.BPI = nullptr; } + AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache( + MF->getFunction()); + LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF); const auto &TLI = *MF->getSubtarget().getTargetLowering(); @@ -3437,7 +3465,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { MF->push_back(MBB); if (BB.hasAddressTaken()) - MBB->setHasAddressTaken(); + MBB->setAddressTakenIRBlock(const_cast<BasicBlock *>(&BB)); if (!HasMustTailInVarArgFn) HasMustTailInVarArgFn = checkForMustTailInVarArgFn(IsVarArg, BB); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 28f3b425c67d..f780050ca3f1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -160,6 +160,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { // If so, erase it. if (isTriviallyDead(MI, MRI)) { LLVM_DEBUG(dbgs() << "Is dead; erasing.\n"); + salvageDebugInfo(MRI, MI); MI.eraseFromParent(); continue; } @@ -183,6 +184,11 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { continue; } + if (MI.getOpcode() == TargetOpcode::G_INVOKE_REGION_START) { + MI.eraseFromParent(); + continue; + } + if (!ISel->select(MI)) { // FIXME: It would be nice to dump all inserted instructions. It's // not obvious how, esp. considering select() can insert after MI. @@ -229,8 +235,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { continue; Register SrcReg = MI.getOperand(1).getReg(); Register DstReg = MI.getOperand(0).getReg(); - if (Register::isVirtualRegister(SrcReg) && - Register::isVirtualRegister(DstReg)) { + if (SrcReg.isVirtual() && DstReg.isVirtual()) { auto SrcRC = MRI.getRegClass(SrcReg); auto DstRC = MRI.getRegClass(DstReg); if (SrcRC == DstRC) { @@ -247,7 +252,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { // that the size of the now-constrained vreg is unchanged and that it has a // register class. for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - unsigned VReg = Register::index2VirtReg(I); + Register VReg = Register::index2VirtReg(I); MachineInstr *MI = nullptr; if (!MRI.def_empty(VReg)) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp index 6271a4514c27..8cfb1b786c24 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp @@ -264,7 +264,7 @@ LegacyLegalizerInfo::findAction(const SizeAndActionsVec &Vec, const uint32_t Siz // Special case for scalarization: if (Vec == SizeAndActionsVec({{1, FewerElements}})) return {1, FewerElements}; - LLVM_FALLTHROUGH; + [[fallthrough]]; case NarrowScalar: { // The following needs to be a loop, as for now, we do allow needing to // go over "Unsupported" bit sizes before finding a legalizable bit size. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp index f09e5b7ce783..1a13f39c100c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -225,6 +225,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode"); if (isTriviallyDead(MI, MRI)) { + salvageDebugInfo(MRI, MI); eraseInstr(MI, MRI, &LocObserver); continue; } @@ -272,6 +273,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode"); if (isTriviallyDead(MI, MRI)) { + salvageDebugInfo(MRI, MI); eraseInstr(MI, MRI, &LocObserver); continue; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 52ee13757f27..8a1fce2d3d65 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -33,6 +33,8 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include <numeric> +#include <optional> #define DEBUG_TYPE "legalizer" @@ -233,7 +235,7 @@ void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts, // Requested sub-vectors of NarrowTy. for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) { ArrayRef<Register> Pieces(&Elts[Offset], NumElts); - VRegs.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0)); + VRegs.push_back(MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0)); } // Leftover element(s). @@ -242,7 +244,8 @@ void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts, } else { LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy); ArrayRef<Register> Pieces(&Elts[Offset], LeftoverNumElts); - VRegs.push_back(MIRBuilder.buildMerge(LeftoverTy, Pieces).getReg(0)); + VRegs.push_back( + MIRBuilder.buildMergeLikeInstr(LeftoverTy, Pieces).getReg(0)); } } @@ -255,7 +258,7 @@ void LegalizerHelper::insertParts(Register DstReg, assert(LeftoverRegs.empty()); if (!ResultTy.isVector()) { - MIRBuilder.buildMerge(DstReg, PartRegs); + MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs); return; } @@ -304,7 +307,7 @@ void LegalizerHelper::mergeMixedSubvectors(Register DstReg, else appendVectorElts(AllElts, Leftover); - MIRBuilder.buildMerge(DstReg, AllElts); + MIRBuilder.buildMergeLikeInstr(DstReg, AllElts); } /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs. @@ -421,7 +424,7 @@ LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy, if (NumSubParts == 1) Remerge[I] = SubMerge[0]; else - Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0); + Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0); // In the sign extend padding case, re-use the first all-signbit merge. if (AllMergePartsArePadding && !AllPadReg) @@ -440,11 +443,11 @@ void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy, // the result. if (DstTy == LCMTy) { - MIRBuilder.buildMerge(DstReg, RemergeRegs); + MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs); return; } - auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs); + auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs); if (DstTy.isScalar() && LCMTy.isScalar()) { MIRBuilder.buildTrunc(DstReg, Remerge); return; @@ -458,7 +461,7 @@ void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy, UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy); MIRBuilder.buildUnmerge(UnmergeDefs, - MIRBuilder.buildMerge(LCMTy, RemergeRegs)); + MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs)); return; } @@ -497,6 +500,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { } while (0) switch (Opcode) { + case TargetOpcode::G_MUL: + RTLIBCASE_INT(MUL_I); case TargetOpcode::G_SDIV: RTLIBCASE_INT(SDIV_I); case TargetOpcode::G_UDIV: @@ -795,6 +800,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { switch (MI.getOpcode()) { default: return UnableToLegalize; + case TargetOpcode::G_MUL: case TargetOpcode::G_SDIV: case TargetOpcode::G_UDIV: case TargetOpcode::G_SREM: @@ -936,7 +942,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, if (DstTy.isVector()) MIRBuilder.buildBuildVector(DstReg, DstRegs); else - MIRBuilder.buildMerge(DstReg, DstRegs); + MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs); MI.eraseFromParent(); return Legalized; } @@ -1008,7 +1014,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0)); } - MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Parts); + MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts); MI.eraseFromParent(); return Legalized; } @@ -1169,7 +1175,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Observer.changingInstr(MI); for (unsigned i = 1; i < MI.getNumOperands(); i += 2) { MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB(); - MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); + MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward()); extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts, SrcRegs[i / 2]); } @@ -1183,7 +1189,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1)); } MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI()); - MIRBuilder.buildMerge(MI.getOperand(0), DstRegs); + MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs); Observer.changedInstr(MI); MI.eraseFromParent(); return Legalized; @@ -1360,7 +1366,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, // Gather the destination registers into the final destination. Register DstReg = MI.getOperand(0).getReg(); - MIRBuilder.buildMerge(DstReg, DstRegs); + MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs); MI.eraseFromParent(); return Legalized; } @@ -1380,7 +1386,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, DstRegs.push_back(DstPart.getReg(0)); } - MIRBuilder.buildMerge(MI.getOperand(0), DstRegs); + MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs); Observer.changedInstr(MI); MI.eraseFromParent(); @@ -1565,7 +1571,7 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, // %9:_(s6) = G_MERGE_VALUES %6, %7, %7 // %10:_(s12) = G_MERGE_VALUES %8, %9 - const int GCD = greatestCommonDivisor(SrcSize, WideSize); + const int GCD = std::gcd(SrcSize, WideSize); LLT GCDTy = LLT::scalar(GCD); SmallVector<Register, 8> Parts; @@ -1597,16 +1603,17 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, // Build merges of each piece. ArrayRef<Register> Slicer(Unmerges); for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) { - auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD)); + auto Merge = + MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD)); NewMergeRegs.push_back(Merge.getReg(0)); } // A truncate may be necessary if the requested type doesn't evenly divide the // original result type. if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) { - MIRBuilder.buildMerge(DstReg, NewMergeRegs); + MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs); } else { - auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs); + auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs); MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0)); } @@ -1734,7 +1741,7 @@ LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, RemergeParts.emplace_back(Parts[Idx]); } - MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts); + MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts); RemergeParts.clear(); } } @@ -1838,7 +1845,7 @@ LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { unsigned Opcode; unsigned ExtOpcode; - Optional<Register> CarryIn = None; + std::optional<Register> CarryIn; switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected opcode!"); @@ -1884,9 +1891,9 @@ LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx, unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false); Observer.changingInstr(MI); - widenScalarDst(MI, WideTy, 1); if (CarryIn) widenScalarSrc(MI, WideTy, 4, BoolExtOp); + widenScalarDst(MI, WideTy, 1); Observer.changedInstr(MI); return Legalized; @@ -2454,7 +2461,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changingInstr(MI); for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); - MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); + MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward()); widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT); } @@ -2675,7 +2682,7 @@ LegalizerHelper::lowerBitcast(MachineInstr &MI) { } else getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy); - MIRBuilder.buildMerge(Dst, SrcRegs); + MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs); MI.eraseFromParent(); return Legalized; } @@ -2683,7 +2690,7 @@ LegalizerHelper::lowerBitcast(MachineInstr &MI) { if (DstTy.isVector()) { SmallVector<Register, 8> SrcRegs; getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType()); - MIRBuilder.buildMerge(Dst, SrcRegs); + MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs); MI.eraseFromParent(); return Legalized; } @@ -3315,7 +3322,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { MI.eraseFromParent(); return Legalized; } - case TargetOpcode::G_FSUB: { + case TargetOpcode::G_FSUB: + case TargetOpcode::G_STRICT_FSUB: { Register Res = MI.getOperand(0).getReg(); LLT Ty = MRI.getType(Res); @@ -3326,9 +3334,13 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return UnableToLegalize; Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); - Register Neg = MRI.createGenericVirtualRegister(Ty); - MIRBuilder.buildFNeg(Neg, RHS); - MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags()); + auto Neg = MIRBuilder.buildFNeg(Ty, RHS); + + if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB) + MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags()); + else + MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags()); + MI.eraseFromParent(); return Legalized; } @@ -3515,6 +3527,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return lowerAbsToAddXor(MI); case G_SELECT: return lowerSelect(MI); + case G_IS_FPCLASS: + return lowerISFPCLASS(MI); case G_SDIVREM: case G_UDIVREM: return lowerDIVREM(MI); @@ -3748,7 +3762,7 @@ LegalizerHelper::fewerElementsVectorMultiEltType( mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]); } else { for (unsigned i = 0; i < NumDefs; ++i) - MIRBuilder.buildMerge(MI.getReg(i), OutputRegs[i]); + MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]); } MI.eraseFromParent(); @@ -3773,7 +3787,7 @@ LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI, for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands(); UseIdx += 2, ++UseNo) { MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB(); - MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); + MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward()); extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]); } @@ -3796,7 +3810,7 @@ LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI, if (NumLeftovers) { mergeMixedSubvectors(MI.getReg(0), OutputRegs); } else { - MIRBuilder.buildMerge(MI.getReg(0), OutputRegs); + MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs); } MI.eraseFromParent(); @@ -3899,10 +3913,11 @@ LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces; ++i, Offset += NumNarrowTyElts) { ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts); - NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0)); + NarrowTyElts.push_back( + MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0)); } - MIRBuilder.buildMerge(DstReg, NarrowTyElts); + MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts); MI.eraseFromParent(); return Legalized; } @@ -3930,10 +3945,11 @@ LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, SmallVector<Register, 8> Sources; for (unsigned j = 0; j < NumElts; ++j) Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg()); - NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Sources).getReg(0)); + NarrowTyElts.push_back( + MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0)); } - MIRBuilder.buildMerge(DstReg, NarrowTyElts); + MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts); MI.eraseFromParent(); return Legalized; } @@ -4214,10 +4230,16 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_SSUBO: case G_SADDE: case G_SSUBE: + case G_STRICT_FADD: + case G_STRICT_FSUB: + case G_STRICT_FMUL: + case G_STRICT_FMA: return fewerElementsVectorMultiEltType(GMI, NumElts); case G_ICMP: case G_FCMP: return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/}); + case G_IS_FPCLASS: + return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/}); case G_SELECT: if (MRI.getType(MI.getOperand(1).getReg()).isVector()) return fewerElementsVectorMultiEltType(GMI, NumElts); @@ -4307,7 +4329,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle( // The input vector this mask element indexes into. unsigned Input = (unsigned)Idx / NewElts; - if (Input >= array_lengthof(Inputs)) { + if (Input >= std::size(Inputs)) { // The mask element does not index into any input vector. Ops.push_back(-1); continue; @@ -4318,7 +4340,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle( // Find or create a shuffle vector operand to hold this input. unsigned OpNo; - for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) { + for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) { if (InputUsed[OpNo] == Input) { // This input vector is already an operand. break; @@ -4329,7 +4351,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle( } } - if (OpNo >= array_lengthof(InputUsed)) { + if (OpNo >= std::size(InputUsed)) { // More than two input vectors used! Give up on trying to create a // shuffle vector. Insert all elements into a BUILD_VECTOR instead. UseBuildVector = true; @@ -4352,7 +4374,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle( // The input vector this mask element indexes into. unsigned Input = (unsigned)Idx / NewElts; - if (Input >= array_lengthof(Inputs)) { + if (Input >= std::size(Inputs)) { // The mask element is "undef" or indexes off the end of the input. SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0)); continue; @@ -4570,7 +4592,7 @@ LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1)); if (Amt.isZero()) { - MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH}); + MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH}); MI.eraseFromParent(); return Legalized; } @@ -4643,7 +4665,7 @@ LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, } } - MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi}); + MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi}); MI.eraseFromParent(); return Legalized; @@ -4754,7 +4776,7 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, llvm_unreachable("not a shift"); } - MIRBuilder.buildMerge(DstReg, ResultRegs); + MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs); MI.eraseFromParent(); return Legalized; } @@ -4820,7 +4842,10 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, case TargetOpcode::G_FMINNUM_IEEE: case TargetOpcode::G_FMAXNUM_IEEE: case TargetOpcode::G_FMINIMUM: - case TargetOpcode::G_FMAXIMUM: { + case TargetOpcode::G_FMAXIMUM: + case TargetOpcode::G_STRICT_FADD: + case TargetOpcode::G_STRICT_FSUB: + case TargetOpcode::G_STRICT_FMUL: { Observer.changingInstr(MI); moreElementsVectorSrc(MI, MoreTy, 1); moreElementsVectorSrc(MI, MoreTy, 2); @@ -4829,6 +4854,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, return Legalized; } case TargetOpcode::G_FMA: + case TargetOpcode::G_STRICT_FMA: case TargetOpcode::G_FSHR: case TargetOpcode::G_FSHL: { Observer.changingInstr(MI); @@ -4922,12 +4948,72 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, } } +/// Expand source vectors to the size of destination vector. +static LegalizerHelper::LegalizeResult +equalizeVectorShuffleLengths(MachineInstr &MI, MachineIRBuilder &MIRBuilder) { + MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); + + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); + unsigned MaskNumElts = Mask.size(); + unsigned SrcNumElts = SrcTy.getNumElements(); + Register DstReg = MI.getOperand(0).getReg(); + LLT DestEltTy = DstTy.getElementType(); + + // TODO: Normalize the shuffle vector since mask and vector length don't + // match. + if (MaskNumElts <= SrcNumElts) { + return LegalizerHelper::LegalizeResult::UnableToLegalize; + } + + unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts); + unsigned NumConcat = PaddedMaskNumElts / SrcNumElts; + LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy); + + // Create new source vectors by concatenating the initial + // source vectors with undefined vectors of the same size. + auto Undef = MIRBuilder.buildUndef(SrcTy); + SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0)); + SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0)); + MOps1[0] = MI.getOperand(1).getReg(); + MOps2[0] = MI.getOperand(2).getReg(); + + auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1); + auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2); + + // Readjust mask for new input vector length. + SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1); + for (unsigned I = 0; I != MaskNumElts; ++I) { + int Idx = Mask[I]; + if (Idx >= static_cast<int>(SrcNumElts)) + Idx += PaddedMaskNumElts - SrcNumElts; + MappedOps[I] = Idx; + } + + // If we got more elements than required, extract subvector. + if (MaskNumElts != PaddedMaskNumElts) { + auto Shuffle = + MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps); + + SmallVector<Register, 16> Elts(MaskNumElts); + for (unsigned I = 0; I < MaskNumElts; ++I) { + Elts[I] = + MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I) + .getReg(0); + } + MIRBuilder.buildBuildVector(DstReg, Elts); + } else { + MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps); + } + + MI.eraseFromParent(); + return LegalizerHelper::LegalizeResult::Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI, unsigned int TypeIdx, LLT MoreTy) { - if (TypeIdx != 0) - return UnableToLegalize; - Register DstReg = MI.getOperand(0).getReg(); Register Src1Reg = MI.getOperand(1).getReg(); Register Src2Reg = MI.getOperand(2).getReg(); @@ -4938,6 +5024,14 @@ LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI, unsigned NumElts = DstTy.getNumElements(); unsigned WidenNumElts = MoreTy.getNumElements(); + if (DstTy.isVector() && Src1Ty.isVector() && + DstTy.getNumElements() > Src1Ty.getNumElements()) { + return equalizeVectorShuffleLengths(MI, MIRBuilder); + } + + if (TypeIdx != 0) + return UnableToLegalize; + // Expect a canonicalized shuffle. if (DstTy != Src1Ty || DstTy != Src2Ty) return UnableToLegalize; @@ -5115,8 +5209,8 @@ LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, CarryIn = CarryOut; } insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy, - makeArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy, - makeArrayRef(DstRegs).drop_front(NarrowParts)); + ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy, + ArrayRef(DstRegs).drop_front(NarrowParts)); MI.eraseFromParent(); return Legalized; @@ -5149,7 +5243,7 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) { // Take only high half of registers if this is high mul. ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts); - MIRBuilder.buildMerge(DstReg, DstRegs); + MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs); MI.eraseFromParent(); return Legalized; } @@ -5239,7 +5333,7 @@ LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, if (MRI.getType(DstReg).isVector()) MIRBuilder.buildBuildVector(DstReg, DstRegs); else if (DstRegs.size() > 1) - MIRBuilder.buildMerge(DstReg, DstRegs); + MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs); else MIRBuilder.buildCopy(DstReg, DstRegs[0]); MI.eraseFromParent(); @@ -5321,10 +5415,10 @@ LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, Register DstReg = MI.getOperand(0).getReg(); if (WideSize > RegTy.getSizeInBits()) { Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize)); - MIRBuilder.buildMerge(MergeReg, DstRegs); + MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs); MIRBuilder.buildTrunc(DstReg, MergeReg); } else - MIRBuilder.buildMerge(DstReg, DstRegs); + MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs); MI.eraseFromParent(); return Legalized; @@ -6582,7 +6676,7 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) { if (InsertVal) { SrcRegs[IdxVal] = MI.getOperand(2).getReg(); - MIRBuilder.buildMerge(DstReg, SrcRegs); + MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs); } else { MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]); } @@ -6754,7 +6848,7 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) { if (SubVectorElts.size() == 1) MIRBuilder.buildCopy(Dst, SubVectorElts[0]); else - MIRBuilder.buildMerge(Dst, SubVectorElts); + MIRBuilder.buildMergeLikeInstr(Dst, SubVectorElts); MI.eraseFromParent(); return Legalized; @@ -6827,7 +6921,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) { DstElts.push_back(UnmergeSrc.getReg(Idx)); } - MIRBuilder.buildMerge(Dst, DstElts); + MIRBuilder.buildMergeLikeInstr(Dst, DstElts); MI.eraseFromParent(); return Legalized; } @@ -7216,6 +7310,166 @@ LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) { return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); + uint64_t Mask = MI.getOperand(2).getImm(); + + if (Mask == 0) { + MIRBuilder.buildConstant(DstReg, 0); + MI.eraseFromParent(); + return Legalized; + } + if ((Mask & fcAllFlags) == fcAllFlags) { + MIRBuilder.buildConstant(DstReg, 1); + MI.eraseFromParent(); + return Legalized; + } + + unsigned BitSize = SrcTy.getScalarSizeInBits(); + const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType()); + + LLT IntTy = LLT::scalar(BitSize); + if (SrcTy.isVector()) + IntTy = LLT::vector(SrcTy.getElementCount(), IntTy); + auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg); + + // Various masks. + APInt SignBit = APInt::getSignMask(BitSize); + APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign. + APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit. + APInt ExpMask = Inf; + APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf; + APInt QNaNBitMask = + APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1); + APInt InvertionMask = APInt::getAllOnesValue(DstTy.getScalarSizeInBits()); + + auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit); + auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask); + auto InfC = MIRBuilder.buildConstant(IntTy, Inf); + auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask); + auto ZeroC = MIRBuilder.buildConstant(IntTy, 0); + + auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC); + auto Sign = + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs); + + auto Res = MIRBuilder.buildConstant(DstTy, 0); + const auto appendToRes = [&](MachineInstrBuilder ToAppend) { + Res = MIRBuilder.buildOr(DstTy, Res, ToAppend); + }; + + // Tests that involve more than one class should be processed first. + if ((Mask & fcFinite) == fcFinite) { + // finite(V) ==> abs(V) u< exp_mask + appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs, + ExpMaskC)); + Mask &= ~fcFinite; + } else if ((Mask & fcFinite) == fcPosFinite) { + // finite(V) && V > 0 ==> V u< exp_mask + appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt, + ExpMaskC)); + Mask &= ~fcPosFinite; + } else if ((Mask & fcFinite) == fcNegFinite) { + // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1 + auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs, + ExpMaskC); + auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign); + appendToRes(And); + Mask &= ~fcNegFinite; + } + + // Check for individual classes. + if (unsigned PartialCheck = Mask & fcZero) { + if (PartialCheck == fcPosZero) + appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, + AsInt, ZeroC)); + else if (PartialCheck == fcZero) + appendToRes( + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC)); + else // fcNegZero + appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, + AsInt, SignBitC)); + } + + if (unsigned PartialCheck = Mask & fcInf) { + if (PartialCheck == fcPosInf) + appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, + AsInt, InfC)); + else if (PartialCheck == fcInf) + appendToRes( + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC)); + else { // fcNegInf + APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt(); + auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf); + appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, + AsInt, NegInfC)); + } + } + + if (unsigned PartialCheck = Mask & fcNan) { + auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask); + if (PartialCheck == fcNan) { + // isnan(V) ==> abs(V) u> int(inf) + appendToRes( + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC)); + } else if (PartialCheck == fcQNan) { + // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit) + appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs, + InfWithQnanBitC)); + } else { // fcSNan + // issignaling(V) ==> abs(V) u> unsigned(Inf) && + // abs(V) u< (unsigned(Inf) | quiet_bit) + auto IsNan = + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC); + auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, + Abs, InfWithQnanBitC); + appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan)); + } + } + + if (unsigned PartialCheck = Mask & fcSubnormal) { + // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set) + // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set) + auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs; + auto OneC = MIRBuilder.buildConstant(IntTy, 1); + auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC); + auto SubnormalRes = + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne, + MIRBuilder.buildConstant(IntTy, AllOneMantissa)); + if (PartialCheck == fcNegSubnormal) + SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign); + appendToRes(SubnormalRes); + } + + if (unsigned PartialCheck = Mask & fcNormal) { + // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u< + // (max_exp-1)) + APInt ExpLSB = ExpMask & ~(ExpMask.shl(1)); + auto ExpMinusOne = MIRBuilder.buildSub( + IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB)); + APInt MaxExpMinusOne = ExpMask - ExpLSB; + auto NormalRes = + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne, + MIRBuilder.buildConstant(IntTy, MaxExpMinusOne)); + if (PartialCheck == fcNegNormal) + NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign); + else if (PartialCheck == fcPosNormal) { + auto PosSign = MIRBuilder.buildXor( + DstTy, Sign, MIRBuilder.buildConstant(DstTy, InvertionMask)); + NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign); + } + appendToRes(NormalRes); + } + + MIRBuilder.buildCopy(DstReg, Res); + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { // Implement vector G_SELECT in terms of XOR, AND, OR. Register DstReg = MI.getOperand(0).getReg(); @@ -7227,6 +7481,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { if (!DstTy.isVector()) return UnableToLegalize; + bool IsEltPtr = DstTy.getElementType().isPointer(); + if (IsEltPtr) { + LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits()); + LLT NewTy = DstTy.changeElementType(ScalarPtrTy); + Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0); + Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0); + DstTy = NewTy; + } + if (MaskTy.isScalar()) { // Turn the scalar condition into a vector condition mask. @@ -7234,10 +7497,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { // The condition was potentially zero extended before, but we want a sign // extended boolean. - if (MaskTy.getSizeInBits() <= DstTy.getScalarSizeInBits() && - MaskTy != LLT::scalar(1)) { + if (MaskTy != LLT::scalar(1)) MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0); - } // Continue the sign extension (or truncate) to match the data type. MaskElt = MIRBuilder.buildSExtOrTrunc(DstTy.getElementType(), @@ -7256,7 +7517,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg); auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg); auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask); - MIRBuilder.buildOr(DstReg, NewOp1, NewOp2); + if (IsEltPtr) { + auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2); + MIRBuilder.buildIntToPtr(DstReg, Or); + } else { + MIRBuilder.buildOr(DstReg, NewOp1, NewOp2); + } MI.eraseFromParent(); return Legalized; } @@ -7378,7 +7644,7 @@ static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps, // If the new LLT cannot cover all of the remaining bits, then consider // issuing a (or a pair of) unaligned and overlapping load / store. - bool Fast; + unsigned Fast; // Need to get a VT equivalent for allowMisalignedMemoryAccesses(). MVT VT = getMVTForLLT(Ty); if (NumMemOps && Op.allowOverlap() && NewTySize < Size && diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 6adb7ddb5b66..4b6c3a156709 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -126,7 +126,7 @@ static bool mutationIsSane(const LegalizeRule &Rule, case FewerElements: if (!OldTy.isVector()) return false; - LLVM_FALLTHROUGH; + [[fallthrough]]; case MoreElements: { // MoreElements can go from scalar to vector. const ElementCount OldElts = OldTy.isVector() ? @@ -296,7 +296,7 @@ LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder( std::initializer_list<unsigned> Opcodes) { unsigned Representative = *Opcodes.begin(); - assert(!llvm::empty(Opcodes) && Opcodes.begin() + 1 != Opcodes.end() && + assert(Opcodes.size() >= 2 && "Initializer list must have at least two opcodes"); for (unsigned Op : llvm::drop_begin(Opcodes)) @@ -330,7 +330,7 @@ LegalizerInfo::getAction(const MachineInstr &MI, const MachineRegisterInfo &MRI) const { SmallVector<LLT, 8> Types; SmallBitVector SeenTypes(8); - const MCOperandInfo *OpInfo = MI.getDesc().OpInfo; + ArrayRef<MCOperandInfo> OpInfo = MI.getDesc().operands(); // FIXME: probably we'll need to cache the results here somehow? for (unsigned i = 0; i < MI.getDesc().getNumOperands(); ++i) { if (!OpInfo[i].isGenericType()) @@ -379,14 +379,14 @@ void LegalizerInfo::verify(const MCInstrInfo &MII) const { for (unsigned Opcode = FirstOp; Opcode <= LastOp; ++Opcode) { const MCInstrDesc &MCID = MII.get(Opcode); const unsigned NumTypeIdxs = std::accumulate( - MCID.opInfo_begin(), MCID.opInfo_end(), 0U, + MCID.operands().begin(), MCID.operands().end(), 0U, [](unsigned Acc, const MCOperandInfo &OpInfo) { return OpInfo.isGenericType() ? std::max(OpInfo.getGenericTypeIndex() + 1U, Acc) : Acc; }); const unsigned NumImmIdxs = std::accumulate( - MCID.opInfo_begin(), MCID.opInfo_end(), 0U, + MCID.operands().begin(), MCID.operands().end(), 0U, [](unsigned Acc, const MCOperandInfo &OpInfo) { return OpInfo.isGenericImm() ? std::max(OpInfo.getGenericImmIndex() + 1U, Acc) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp index be1bc865d1e1..7c6eac8c8ce0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp @@ -306,7 +306,7 @@ bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) { bool AnyMerged = false; do { unsigned NumPow2 = PowerOf2Floor(StoresToMerge.size()); - unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedSize(); + unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedValue(); // Compute the biggest store we can generate to handle the number of stores. unsigned MergeSizeBits; for (MergeSizeBits = MaxSizeBits; MergeSizeBits > 1; MergeSizeBits /= 2) { @@ -352,13 +352,13 @@ bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) { const unsigned NumStores = Stores.size(); LLT SmallTy = MRI->getType(FirstStore->getValueReg()); LLT WideValueTy = - LLT::scalar(NumStores * SmallTy.getSizeInBits().getFixedSize()); + LLT::scalar(NumStores * SmallTy.getSizeInBits().getFixedValue()); // For each store, compute pairwise merged debug locs. - DebugLoc MergedLoc; - for (unsigned AIdx = 0, BIdx = 1; BIdx < NumStores; ++AIdx, ++BIdx) - MergedLoc = DILocation::getMergedLocation(Stores[AIdx]->getDebugLoc(), - Stores[BIdx]->getDebugLoc()); + DebugLoc MergedLoc = Stores.front()->getDebugLoc(); + for (auto *Store : drop_begin(Stores)) + MergedLoc = DILocation::getMergedLocation(MergedLoc, Store->getDebugLoc()); + Builder.setInstr(*Stores.back()); Builder.setDebugLoc(MergedLoc); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp index c1287693e74d..bf4dcc2c2459 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -181,6 +181,17 @@ bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) { MI->removeFromParent(); MBB.insert(II, MI); Changed = true; + + // If the instruction (constant) being localized has single user, we can + // propagate debug location from user. + if (Users.size() == 1) { + const auto &DefDL = MI->getDebugLoc(); + const auto &UserDL = (*Users.begin())->getDebugLoc(); + + if ((!DefDL || DefDL.getLine() == 0) && UserDL && UserDL.getLine() != 0) { + MI->setDebugLoc(UserDL); + } + } } return Changed; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 2e22dae35e5a..9100e064f30f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -27,6 +27,7 @@ void MachineIRBuilder::setMF(MachineFunction &MF) { State.MRI = &MF.getRegInfo(); State.TII = MF.getSubtarget().getInstrInfo(); State.DL = DebugLoc(); + State.PCSections = nullptr; State.II = MachineBasicBlock::iterator(); State.Observer = nullptr; } @@ -36,8 +37,7 @@ void MachineIRBuilder::setMF(MachineFunction &MF) { //------------------------------------------------------------------------------ MachineInstrBuilder MachineIRBuilder::buildInstrNoInsert(unsigned Opcode) { - MachineInstrBuilder MIB = BuildMI(getMF(), getDL(), getTII().get(Opcode)); - return MIB; + return BuildMI(getMF(), {getDL(), getPCSections()}, getTII().get(Opcode)); } MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) { @@ -96,13 +96,23 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C, cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) && "Expected inlined-at fields to agree"); auto MIB = buildInstrNoInsert(TargetOpcode::DBG_VALUE); - if (auto *CI = dyn_cast<ConstantInt>(&C)) { + + auto *NumericConstant = [&] () -> const Constant* { + if (const auto *CE = dyn_cast<ConstantExpr>(&C)) + if (CE->getOpcode() == Instruction::IntToPtr) + return CE->getOperand(0); + return &C; + }(); + + if (auto *CI = dyn_cast<ConstantInt>(NumericConstant)) { if (CI->getBitWidth() > 64) MIB.addCImm(CI); else MIB.addImm(CI->getZExtValue()); - } else if (auto *CFP = dyn_cast<ConstantFP>(&C)) { + } else if (auto *CFP = dyn_cast<ConstantFP>(NumericConstant)) { MIB.addFPImm(CFP); + } else if (isa<ConstantPointerNull>(NumericConstant)) { + MIB.addImm(0); } else { // Insert $noreg if we didn't find a usable constant and had to drop it. MIB.addReg(Register()); @@ -187,7 +197,7 @@ MachineInstrBuilder MachineIRBuilder::buildPtrAdd(const DstOp &Res, return buildInstr(TargetOpcode::G_PTR_ADD, {Res}, {Op0, Op1}); } -Optional<MachineInstrBuilder> +std::optional<MachineInstrBuilder> MachineIRBuilder::materializePtrAdd(Register &Res, Register Op0, const LLT ValueTy, uint64_t Value) { assert(Res == 0 && "Res is a result argument"); @@ -195,7 +205,7 @@ MachineIRBuilder::materializePtrAdd(Register &Res, Register Op0, if (Value == 0) { Res = Op0; - return None; + return std::nullopt; } Res = getMRI()->createGenericVirtualRegister(getMRI()->getType(Op0)); @@ -233,7 +243,7 @@ MachineIRBuilder::buildPadVectorWithUndefElements(const DstOp &Res, unsigned NumberOfPadElts = ResTy.getNumElements() - Regs.size(); for (unsigned i = 0; i < NumberOfPadElts; ++i) Regs.push_back(Undef); - return buildMerge(Res, Regs); + return buildMergeLikeInstr(Res, Regs); } MachineInstrBuilder @@ -252,7 +262,7 @@ MachineIRBuilder::buildDeleteTrailingVectorElements(const DstOp &Res, auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0); for (unsigned i = 0; i < ResTy.getNumElements(); ++i) Regs.push_back(Unmerge.getReg(i)); - return buildMerge(Res, Regs); + return buildMergeLikeInstr(Res, Regs); } MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) { @@ -587,8 +597,8 @@ MachineInstrBuilder MachineIRBuilder::buildUndef(const DstOp &Res) { return buildInstr(TargetOpcode::G_IMPLICIT_DEF, {Res}, {}); } -MachineInstrBuilder MachineIRBuilder::buildMerge(const DstOp &Res, - ArrayRef<Register> Ops) { +MachineInstrBuilder MachineIRBuilder::buildMergeValues(const DstOp &Res, + ArrayRef<Register> Ops) { // Unfortunately to convert from ArrayRef<LLT> to ArrayRef<SrcOp>, // we need some temporary storage for the DstOp objects. Here we use a // sufficiently large SmallVector to not go through the heap. @@ -598,10 +608,32 @@ MachineInstrBuilder MachineIRBuilder::buildMerge(const DstOp &Res, } MachineInstrBuilder -MachineIRBuilder::buildMerge(const DstOp &Res, - std::initializer_list<SrcOp> Ops) { +MachineIRBuilder::buildMergeLikeInstr(const DstOp &Res, + ArrayRef<Register> Ops) { + // Unfortunately to convert from ArrayRef<LLT> to ArrayRef<SrcOp>, + // we need some temporary storage for the DstOp objects. Here we use a + // sufficiently large SmallVector to not go through the heap. + SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end()); + assert(TmpVec.size() > 1); + return buildInstr(getOpcodeForMerge(Res, TmpVec), Res, TmpVec); +} + +MachineInstrBuilder +MachineIRBuilder::buildMergeLikeInstr(const DstOp &Res, + std::initializer_list<SrcOp> Ops) { assert(Ops.size() > 1); - return buildInstr(TargetOpcode::G_MERGE_VALUES, Res, Ops); + return buildInstr(getOpcodeForMerge(Res, Ops), Res, Ops); +} + +unsigned MachineIRBuilder::getOpcodeForMerge(const DstOp &DstOp, + ArrayRef<SrcOp> SrcOps) const { + if (DstOp.getLLTTy(*getMRI()).isVector()) { + if (SrcOps[0].getLLTTy(*getMRI()).isVector()) + return TargetOpcode::G_CONCAT_VECTORS; + return TargetOpcode::G_BUILD_VECTOR; + } + + return TargetOpcode::G_MERGE_VALUES; } MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res, @@ -664,6 +696,9 @@ MachineIRBuilder::buildBuildVectorTrunc(const DstOp &Res, // we need some temporary storage for the DstOp objects. Here we use a // sufficiently large SmallVector to not go through the heap. SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end()); + if (TmpVec[0].getLLTTy(*getMRI()).getSizeInBits() == + Res.getLLTTy(*getMRI()).getElementType().getSizeInBits()) + return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); return buildInstr(TargetOpcode::G_BUILD_VECTOR_TRUNC, Res, TmpVec); } @@ -752,9 +787,9 @@ MachineInstrBuilder MachineIRBuilder::buildTrunc(const DstOp &Res, return buildInstr(TargetOpcode::G_TRUNC, Res, Op); } -MachineInstrBuilder MachineIRBuilder::buildFPTrunc(const DstOp &Res, - const SrcOp &Op, - Optional<unsigned> Flags) { +MachineInstrBuilder +MachineIRBuilder::buildFPTrunc(const DstOp &Res, const SrcOp &Op, + std::optional<unsigned> Flags) { return buildInstr(TargetOpcode::G_FPTRUNC, Res, Op, Flags); } @@ -769,16 +804,15 @@ MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, - Optional<unsigned> Flags) { + std::optional<unsigned> Flags) { return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1}, Flags); } -MachineInstrBuilder MachineIRBuilder::buildSelect(const DstOp &Res, - const SrcOp &Tst, - const SrcOp &Op0, - const SrcOp &Op1, - Optional<unsigned> Flags) { +MachineInstrBuilder +MachineIRBuilder::buildSelect(const DstOp &Res, const SrcOp &Tst, + const SrcOp &Op0, const SrcOp &Op1, + std::optional<unsigned> Flags) { return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1}, Flags); } @@ -1019,10 +1053,10 @@ void MachineIRBuilder::validateSelectOp(const LLT ResTy, const LLT TstTy, #endif } -MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, - ArrayRef<DstOp> DstOps, - ArrayRef<SrcOp> SrcOps, - Optional<unsigned> Flags) { +MachineInstrBuilder +MachineIRBuilder::buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps, + ArrayRef<SrcOp> SrcOps, + std::optional<unsigned> Flags) { switch (Opc) { default: break; @@ -1150,7 +1184,7 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, break; } case TargetOpcode::G_MERGE_VALUES: { - assert(!SrcOps.empty() && "invalid trivial sequence"); + assert(SrcOps.size() >= 2 && "invalid trivial sequence"); assert(DstOps.size() == 1 && "Invalid Dst"); assert(llvm::all_of(SrcOps, [&, this](const SrcOp &Op) { @@ -1162,13 +1196,8 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == DstOps[0].getLLTTy(*getMRI()).getSizeInBits() && "input operands do not cover output register"); - if (SrcOps.size() == 1) - return buildCast(DstOps[0], SrcOps[0]); - if (DstOps[0].getLLTTy(*getMRI()).isVector()) { - if (SrcOps[0].getLLTTy(*getMRI()).isVector()) - return buildInstr(TargetOpcode::G_CONCAT_VECTORS, DstOps, SrcOps); - return buildInstr(TargetOpcode::G_BUILD_VECTOR, DstOps, SrcOps); - } + assert(!DstOps[0].getLLTTy(*getMRI()).isVector() && + "vectors should be built with G_CONCAT_VECTOR or G_BUILD_VECTOR"); break; } case TargetOpcode::G_EXTRACT_VECTOR_ELT: { @@ -1228,9 +1257,6 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, SrcOps[0].getLLTTy(*getMRI()); }) && "type mismatch in input list"); - if (SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == - DstOps[0].getLLTTy(*getMRI()).getElementType().getSizeInBits()) - return buildInstr(TargetOpcode::G_BUILD_VECTOR, DstOps, SrcOps); break; } case TargetOpcode::G_CONCAT_VECTORS: { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp index bce850ee212c..080f3ca540f2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -153,8 +153,7 @@ bool RegBankSelect::repairReg( if (MO.isDef()) std::swap(Src, Dst); - assert((RepairPt.getNumInsertPoints() == 1 || - Register::isPhysicalRegister(Dst)) && + assert((RepairPt.getNumInsertPoints() == 1 || Dst.isPhysical()) && "We are about to create several defs for Dst"); // Build the instruction used to repair, then clone it at the right @@ -398,7 +397,7 @@ void RegBankSelect::tryAvoidingSplit( // Check if this is a physical or virtual register. Register Reg = MO.getReg(); - if (Register::isPhysicalRegister(Reg)) { + if (Reg.isPhysical()) { // We are going to split every outgoing edges. // Check that this is possible. // FIXME: The machine representation is currently broken @@ -458,6 +457,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping( LLVM_DEBUG(dbgs() << "Mapping is too expensive from the start\n"); return Cost; } + const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); // Moreover, to realize this mapping, the register bank of each operand must // match this mapping. In other words, we may need to locally reassign the @@ -471,6 +471,10 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping( Register Reg = MO.getReg(); if (!Reg) continue; + LLT Ty = MRI.getType(Reg); + if (!Ty.isValid()) + continue; + LLVM_DEBUG(dbgs() << "Opd" << OpIdx << '\n'); const RegisterBankInfo::ValueMapping &ValMapping = InstrMapping.getOperandMapping(OpIdx); @@ -603,6 +607,9 @@ bool RegBankSelect::applyMapping( MRI->setRegBank(Reg, *ValMapping.BreakDown[0].RegBank); break; case RepairingPlacement::Insert: + // Don't insert additional instruction for debug instruction. + if (MI.isDebugInstr()) + break; OpdMapper.createVRegs(OpIdx); if (!repairReg(MO, ValMapping, RepairPt, OpdMapper.getVRegs(OpIdx))) return false; @@ -667,31 +674,7 @@ bool RegBankSelect::assignInstr(MachineInstr &MI) { return applyMapping(MI, *BestMapping, RepairPts); } -bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { - // If the ISel pipeline failed, do not bother running that pass. - if (MF.getProperties().hasProperty( - MachineFunctionProperties::Property::FailedISel)) - return false; - - LLVM_DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n'); - const Function &F = MF.getFunction(); - Mode SaveOptMode = OptMode; - if (F.hasOptNone()) - OptMode = Mode::Fast; - init(MF); - -#ifndef NDEBUG - // Check that our input is fully legal: we require the function to have the - // Legalized property, so it should be. - // FIXME: This should be in the MachineVerifier. - if (!DisableGISelLegalityCheck) - if (const MachineInstr *MI = machineFunctionIsIllegal(MF)) { - reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect", - "instruction is not legal", *MI); - return false; - } -#endif - +bool RegBankSelect::assignRegisterBanks(MachineFunction &MF) { // Walk the function and assign register banks to all operands. // Use a RPOT to make sure all registers are assigned before we choose // the best mapping of the current instruction. @@ -716,10 +699,6 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { if (MI.isInlineAsm()) continue; - // Ignore debug info. - if (MI.isDebugInstr()) - continue; - // Ignore IMPLICIT_DEF which must have a regclass. if (MI.isImplicitDef()) continue; @@ -732,6 +711,42 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { } } + return true; +} + +bool RegBankSelect::checkFunctionIsLegal(MachineFunction &MF) const { +#ifndef NDEBUG + if (!DisableGISelLegalityCheck) { + if (const MachineInstr *MI = machineFunctionIsIllegal(MF)) { + reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect", + "instruction is not legal", *MI); + return false; + } + } +#endif + return true; +} + +bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { + // If the ISel pipeline failed, do not bother running that pass. + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + + LLVM_DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n'); + const Function &F = MF.getFunction(); + Mode SaveOptMode = OptMode; + if (F.hasOptNone()) + OptMode = Mode::Fast; + init(MF); + +#ifndef NDEBUG + if (!checkFunctionIsLegal(MF)) + return false; +#endif + + assignRegisterBanks(MF); + OptMode = SaveOptMode; return false; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 013c8700e8ae..07448548c295 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -12,7 +12,7 @@ #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" -#include "llvm/ADT/Optional.h" +#include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" @@ -32,6 +32,8 @@ #include "llvm/IR/Constants.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/SizeOpts.h" +#include <numeric> +#include <optional> #define DEBUG_TYPE "globalisel-utils" @@ -55,7 +57,7 @@ Register llvm::constrainOperandRegClass( const TargetRegisterClass &RegClass, MachineOperand &RegMO) { Register Reg = RegMO.getReg(); // Assume physical registers are properly constrained. - assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented"); + assert(Reg.isVirtual() && "PhysReg not implemented"); // Save the old register class to check whether // the change notifications will be required. @@ -107,7 +109,7 @@ Register llvm::constrainOperandRegClass( MachineOperand &RegMO, unsigned OpIdx) { Register Reg = RegMO.getReg(); // Assume physical registers are properly constrained. - assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented"); + assert(Reg.isVirtual() && "PhysReg not implemented"); const TargetRegisterClass *OpRC = TII.getRegClass(II, OpIdx, &TRI, MF); // Some of the target independent instructions, like COPY, may not impose any @@ -169,7 +171,7 @@ bool llvm::constrainSelectedInstRegOperands(MachineInstr &I, Register Reg = MO.getReg(); // Physical registers don't need to be constrained. - if (Register::isPhysicalRegister(Reg)) + if (Reg.isPhysical()) continue; // Register operands with a value of 0 (e.g. predicate operands) don't need @@ -233,7 +235,7 @@ bool llvm::isTriviallyDead(const MachineInstr &MI, continue; Register Reg = MO.getReg(); - if (Register::isPhysicalRegister(Reg) || !MRI.use_nodbg_empty(Reg)) + if (Reg.isPhysical() || !MRI.use_nodbg_empty(Reg)) return false; } return true; @@ -283,31 +285,31 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, reportGISelFailure(MF, TPC, MORE, R); } -Optional<APInt> llvm::getIConstantVRegVal(Register VReg, - const MachineRegisterInfo &MRI) { - Optional<ValueAndVReg> ValAndVReg = getIConstantVRegValWithLookThrough( +std::optional<APInt> llvm::getIConstantVRegVal(Register VReg, + const MachineRegisterInfo &MRI) { + std::optional<ValueAndVReg> ValAndVReg = getIConstantVRegValWithLookThrough( VReg, MRI, /*LookThroughInstrs*/ false); assert((!ValAndVReg || ValAndVReg->VReg == VReg) && "Value found while looking through instrs"); if (!ValAndVReg) - return None; + return std::nullopt; return ValAndVReg->Value; } -Optional<int64_t> +std::optional<int64_t> llvm::getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI) { - Optional<APInt> Val = getIConstantVRegVal(VReg, MRI); + std::optional<APInt> Val = getIConstantVRegVal(VReg, MRI); if (Val && Val->getBitWidth() <= 64) return Val->getSExtValue(); - return None; + return std::nullopt; } namespace { typedef std::function<bool(const MachineInstr *)> IsOpcodeFn; -typedef std::function<Optional<APInt>(const MachineInstr *MI)> GetAPCstFn; +typedef std::function<std::optional<APInt>(const MachineInstr *MI)> GetAPCstFn; -Optional<ValueAndVReg> getConstantVRegValWithLookThrough( +std::optional<ValueAndVReg> getConstantVRegValWithLookThrough( Register VReg, const MachineRegisterInfo &MRI, IsOpcodeFn IsConstantOpcode, GetAPCstFn getAPCstValue, bool LookThroughInstrs = true, bool LookThroughAnyExt = false) { @@ -319,8 +321,8 @@ Optional<ValueAndVReg> getConstantVRegValWithLookThrough( switch (MI->getOpcode()) { case TargetOpcode::G_ANYEXT: if (!LookThroughAnyExt) - return None; - LLVM_FALLTHROUGH; + return std::nullopt; + [[fallthrough]]; case TargetOpcode::G_TRUNC: case TargetOpcode::G_SEXT: case TargetOpcode::G_ZEXT: @@ -331,22 +333,22 @@ Optional<ValueAndVReg> getConstantVRegValWithLookThrough( break; case TargetOpcode::COPY: VReg = MI->getOperand(1).getReg(); - if (Register::isPhysicalRegister(VReg)) - return None; + if (VReg.isPhysical()) + return std::nullopt; break; case TargetOpcode::G_INTTOPTR: VReg = MI->getOperand(1).getReg(); break; default: - return None; + return std::nullopt; } } if (!MI || !IsConstantOpcode(MI)) - return None; + return std::nullopt; - Optional<APInt> MaybeVal = getAPCstValue(MI); + std::optional<APInt> MaybeVal = getAPCstValue(MI); if (!MaybeVal) - return None; + return std::nullopt; APInt &Val = *MaybeVal; while (!SeenOpcodes.empty()) { std::pair<unsigned, unsigned> OpcodeAndSize = SeenOpcodes.pop_back_val(); @@ -386,31 +388,31 @@ bool isAnyConstant(const MachineInstr *MI) { return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_FCONSTANT; } -Optional<APInt> getCImmAsAPInt(const MachineInstr *MI) { +std::optional<APInt> getCImmAsAPInt(const MachineInstr *MI) { const MachineOperand &CstVal = MI->getOperand(1); if (CstVal.isCImm()) return CstVal.getCImm()->getValue(); - return None; + return std::nullopt; } -Optional<APInt> getCImmOrFPImmAsAPInt(const MachineInstr *MI) { +std::optional<APInt> getCImmOrFPImmAsAPInt(const MachineInstr *MI) { const MachineOperand &CstVal = MI->getOperand(1); if (CstVal.isCImm()) return CstVal.getCImm()->getValue(); if (CstVal.isFPImm()) return CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); - return None; + return std::nullopt; } } // end anonymous namespace -Optional<ValueAndVReg> llvm::getIConstantVRegValWithLookThrough( +std::optional<ValueAndVReg> llvm::getIConstantVRegValWithLookThrough( Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) { return getConstantVRegValWithLookThrough(VReg, MRI, isIConstant, getCImmAsAPInt, LookThroughInstrs); } -Optional<ValueAndVReg> llvm::getAnyConstantVRegValWithLookThrough( +std::optional<ValueAndVReg> llvm::getAnyConstantVRegValWithLookThrough( Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs, bool LookThroughAnyExt) { return getConstantVRegValWithLookThrough( @@ -418,12 +420,12 @@ Optional<ValueAndVReg> llvm::getAnyConstantVRegValWithLookThrough( LookThroughAnyExt); } -Optional<FPValueAndVReg> llvm::getFConstantVRegValWithLookThrough( +std::optional<FPValueAndVReg> llvm::getFConstantVRegValWithLookThrough( Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) { auto Reg = getConstantVRegValWithLookThrough( VReg, MRI, isFConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs); if (!Reg) - return None; + return std::nullopt; return FPValueAndVReg{getConstantFPVRegVal(Reg->VReg, MRI)->getValueAPF(), Reg->VReg}; } @@ -436,13 +438,13 @@ llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) { return MI->getOperand(1).getFPImm(); } -Optional<DefinitionAndSourceRegister> +std::optional<DefinitionAndSourceRegister> llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) { Register DefSrcReg = Reg; auto *DefMI = MRI.getVRegDef(Reg); auto DstTy = MRI.getType(DefMI->getOperand(0).getReg()); if (!DstTy.isValid()) - return None; + return std::nullopt; unsigned Opc = DefMI->getOpcode(); while (Opc == TargetOpcode::COPY || isPreISelGenericOptimizationHint(Opc)) { Register SrcReg = DefMI->getOperand(1).getReg(); @@ -458,14 +460,14 @@ llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) { MachineInstr *llvm::getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) { - Optional<DefinitionAndSourceRegister> DefSrcReg = + std::optional<DefinitionAndSourceRegister> DefSrcReg = getDefSrcRegIgnoringCopies(Reg, MRI); return DefSrcReg ? DefSrcReg->MI : nullptr; } Register llvm::getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) { - Optional<DefinitionAndSourceRegister> DefSrcReg = + std::optional<DefinitionAndSourceRegister> DefSrcReg = getDefSrcRegIgnoringCopies(Reg, MRI); return DefSrcReg ? DefSrcReg->Reg : Register(); } @@ -489,16 +491,17 @@ APFloat llvm::getAPFloatFromSize(double Val, unsigned Size) { return APF; } -Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1, - const Register Op2, - const MachineRegisterInfo &MRI) { +std::optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, + const Register Op1, + const Register Op2, + const MachineRegisterInfo &MRI) { auto MaybeOp2Cst = getAnyConstantVRegValWithLookThrough(Op2, MRI, false); if (!MaybeOp2Cst) - return None; + return std::nullopt; auto MaybeOp1Cst = getAnyConstantVRegValWithLookThrough(Op1, MRI, false); if (!MaybeOp1Cst) - return None; + return std::nullopt; const APInt &C1 = MaybeOp1Cst->Value; const APInt &C2 = MaybeOp2Cst->Value; @@ -550,19 +553,19 @@ Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1, return APIntOps::umax(C1, C2); } - return None; + return std::nullopt; } -Optional<APFloat> llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, - const Register Op2, - const MachineRegisterInfo &MRI) { +std::optional<APFloat> +llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, + const Register Op2, const MachineRegisterInfo &MRI) { const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI); if (!Op2Cst) - return None; + return std::nullopt; const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI); if (!Op1Cst) - return None; + return std::nullopt; APFloat C1 = Op1Cst->getValueAPF(); const APFloat &C2 = Op2Cst->getValueAPF(); @@ -604,7 +607,7 @@ Optional<APFloat> llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, break; } - return None; + return std::nullopt; } SmallVector<APInt> @@ -656,6 +659,20 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, switch (DefMI->getOpcode()) { default: break; + case TargetOpcode::G_FADD: + case TargetOpcode::G_FSUB: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_FDIV: + case TargetOpcode::G_FREM: + case TargetOpcode::G_FSIN: + case TargetOpcode::G_FCOS: + case TargetOpcode::G_FMA: + case TargetOpcode::G_FMAD: + if (SNaN) + return true; + + // TODO: Need isKnownNeverInfinity + return false; case TargetOpcode::G_FMINNUM_IEEE: case TargetOpcode::G_FMAXNUM_IEEE: { if (SNaN) @@ -742,9 +759,9 @@ Register llvm::getFunctionLiveInPhysReg(MachineFunction &MF, return LiveIn; } -Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1, - uint64_t Imm, - const MachineRegisterInfo &MRI) { +std::optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, + const Register Op1, uint64_t Imm, + const MachineRegisterInfo &MRI) { auto MaybeOp1Cst = getIConstantVRegVal(Op1, MRI); if (MaybeOp1Cst) { switch (Opcode) { @@ -756,12 +773,12 @@ Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1, } } } - return None; + return std::nullopt; } -Optional<APFloat> llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy, - Register Src, - const MachineRegisterInfo &MRI) { +std::optional<APFloat> +llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy, Register Src, + const MachineRegisterInfo &MRI) { assert(Opcode == TargetOpcode::G_SITOFP || Opcode == TargetOpcode::G_UITOFP); if (auto MaybeSrcVal = getIConstantVRegVal(Src, MRI)) { APFloat DstVal(getFltSemanticForLLT(DstTy)); @@ -769,30 +786,30 @@ Optional<APFloat> llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy, APFloat::rmNearestTiesToEven); return DstVal; } - return None; + return std::nullopt; } -Optional<SmallVector<unsigned>> +std::optional<SmallVector<unsigned>> llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) { LLT Ty = MRI.getType(Src); SmallVector<unsigned> FoldedCTLZs; - auto tryFoldScalar = [&](Register R) -> Optional<unsigned> { + auto tryFoldScalar = [&](Register R) -> std::optional<unsigned> { auto MaybeCst = getIConstantVRegVal(R, MRI); if (!MaybeCst) - return None; + return std::nullopt; return MaybeCst->countLeadingZeros(); }; if (Ty.isVector()) { // Try to constant fold each element. auto *BV = getOpcodeDef<GBuildVector>(Src, MRI); if (!BV) - return None; + return std::nullopt; for (unsigned SrcIdx = 0; SrcIdx < BV->getNumSources(); ++SrcIdx) { if (auto MaybeFold = tryFoldScalar(BV->getSourceReg(SrcIdx))) { FoldedCTLZs.emplace_back(*MaybeFold); continue; } - return None; + return std::nullopt; } return FoldedCTLZs; } @@ -800,12 +817,12 @@ llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) { FoldedCTLZs.emplace_back(*MaybeCst); return FoldedCTLZs; } - return None; + return std::nullopt; } bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI, GISelKnownBits *KB) { - Optional<DefinitionAndSourceRegister> DefSrcReg = + std::optional<DefinitionAndSourceRegister> DefSrcReg = getDefSrcRegIgnoringCopies(Reg, MRI); if (!DefSrcReg) return false; @@ -879,12 +896,6 @@ void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) { AU.addPreserved<StackProtector>(); } -static unsigned getLCMSize(unsigned OrigSize, unsigned TargetSize) { - unsigned Mul = OrigSize * TargetSize; - unsigned GCDSize = greatestCommonDivisor(OrigSize, TargetSize); - return Mul / GCDSize; -} - LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) { const unsigned OrigSize = OrigTy.getSizeInBits(); const unsigned TargetSize = TargetTy.getSizeInBits(); @@ -899,8 +910,8 @@ LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) { const LLT TargetElt = TargetTy.getElementType(); if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) { - int GCDElts = greatestCommonDivisor(OrigTy.getNumElements(), - TargetTy.getNumElements()); + int GCDElts = + std::gcd(OrigTy.getNumElements(), TargetTy.getNumElements()); // Prefer the original element type. ElementCount Mul = OrigTy.getElementCount() * TargetTy.getNumElements(); return LLT::vector(Mul.divideCoefficientBy(GCDElts), @@ -911,16 +922,16 @@ LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) { return OrigTy; } - unsigned LCMSize = getLCMSize(OrigSize, TargetSize); + unsigned LCMSize = std::lcm(OrigSize, TargetSize); return LLT::fixed_vector(LCMSize / OrigElt.getSizeInBits(), OrigElt); } if (TargetTy.isVector()) { - unsigned LCMSize = getLCMSize(OrigSize, TargetSize); + unsigned LCMSize = std::lcm(OrigSize, TargetSize); return LLT::fixed_vector(LCMSize / OrigSize, OrigTy); } - unsigned LCMSize = getLCMSize(OrigSize, TargetSize); + unsigned LCMSize = std::lcm(OrigSize, TargetSize); // Preserve pointer types. if (LCMSize == OrigSize) @@ -958,8 +969,7 @@ LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) { if (TargetTy.isVector()) { LLT TargetElt = TargetTy.getElementType(); if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) { - int GCD = greatestCommonDivisor(OrigTy.getNumElements(), - TargetTy.getNumElements()); + int GCD = std::gcd(OrigTy.getNumElements(), TargetTy.getNumElements()); return LLT::scalarOrVector(ElementCount::getFixed(GCD), OrigElt); } } else { @@ -968,7 +978,7 @@ LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) { return OrigElt; } - unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize); + unsigned GCD = std::gcd(OrigSize, TargetSize); if (GCD == OrigElt.getSizeInBits()) return OrigElt; @@ -986,11 +996,11 @@ LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) { return OrigTy; } - unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize); + unsigned GCD = std::gcd(OrigSize, TargetSize); return LLT::scalar(GCD); } -Optional<int> llvm::getSplatIndex(MachineInstr &MI) { +std::optional<int> llvm::getSplatIndex(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && "Only G_SHUFFLE_VECTOR can have a splat index!"); ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); @@ -1006,7 +1016,7 @@ Optional<int> llvm::getSplatIndex(MachineInstr &MI) { int SplatValue = *FirstDefinedIdx; if (any_of(make_range(std::next(FirstDefinedIdx), Mask.end()), [&SplatValue](int Elt) { return Elt >= 0 && Elt != SplatValue; })) - return None; + return std::nullopt; return SplatValue; } @@ -1018,36 +1028,41 @@ static bool isBuildVectorOp(unsigned Opcode) { namespace { -Optional<ValueAndVReg> getAnyConstantSplat(Register VReg, - const MachineRegisterInfo &MRI, - bool AllowUndef) { +std::optional<ValueAndVReg> getAnyConstantSplat(Register VReg, + const MachineRegisterInfo &MRI, + bool AllowUndef) { MachineInstr *MI = getDefIgnoringCopies(VReg, MRI); if (!MI) - return None; + return std::nullopt; - if (!isBuildVectorOp(MI->getOpcode())) - return None; + bool isConcatVectorsOp = MI->getOpcode() == TargetOpcode::G_CONCAT_VECTORS; + if (!isBuildVectorOp(MI->getOpcode()) && !isConcatVectorsOp) + return std::nullopt; - Optional<ValueAndVReg> SplatValAndReg = None; + std::optional<ValueAndVReg> SplatValAndReg; for (MachineOperand &Op : MI->uses()) { Register Element = Op.getReg(); + // If we have a G_CONCAT_VECTOR, we recursively look into the + // vectors that we're concatenating to see if they're splats. auto ElementValAndReg = - getAnyConstantVRegValWithLookThrough(Element, MRI, true, true); + isConcatVectorsOp + ? getAnyConstantSplat(Element, MRI, AllowUndef) + : getAnyConstantVRegValWithLookThrough(Element, MRI, true, true); // If AllowUndef, treat undef as value that will result in a constant splat. if (!ElementValAndReg) { if (AllowUndef && isa<GImplicitDef>(MRI.getVRegDef(Element))) continue; - return None; + return std::nullopt; } // Record splat value if (!SplatValAndReg) SplatValAndReg = ElementValAndReg; - // Different constant then the one already recorded, not a constant splat. + // Different constant than the one already recorded, not a constant splat. if (SplatValAndReg->Value != ElementValAndReg->Value) - return None; + return std::nullopt; } return SplatValAndReg; @@ -1070,44 +1085,45 @@ bool llvm::isBuildVectorConstantSplat(const MachineInstr &MI, AllowUndef); } -Optional<APInt> llvm::getIConstantSplatVal(const Register Reg, - const MachineRegisterInfo &MRI) { +std::optional<APInt> +llvm::getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI) { if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, /* AllowUndef */ false)) { - Optional<ValueAndVReg> ValAndVReg = + std::optional<ValueAndVReg> ValAndVReg = getIConstantVRegValWithLookThrough(SplatValAndReg->VReg, MRI); return ValAndVReg->Value; } - return None; + return std::nullopt; } -Optional<APInt> getIConstantSplatVal(const MachineInstr &MI, - const MachineRegisterInfo &MRI) { +std::optional<APInt> +llvm::getIConstantSplatVal(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { return getIConstantSplatVal(MI.getOperand(0).getReg(), MRI); } -Optional<int64_t> +std::optional<int64_t> llvm::getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI) { if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, /* AllowUndef */ false)) return getIConstantVRegSExtVal(SplatValAndReg->VReg, MRI); - return None; + return std::nullopt; } -Optional<int64_t> +std::optional<int64_t> llvm::getIConstantSplatSExtVal(const MachineInstr &MI, const MachineRegisterInfo &MRI) { return getIConstantSplatSExtVal(MI.getOperand(0).getReg(), MRI); } -Optional<FPValueAndVReg> llvm::getFConstantSplat(Register VReg, - const MachineRegisterInfo &MRI, - bool AllowUndef) { +std::optional<FPValueAndVReg> +llvm::getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, + bool AllowUndef) { if (auto SplatValAndReg = getAnyConstantSplat(VReg, MRI, AllowUndef)) return getFConstantVRegValWithLookThrough(SplatValAndReg->VReg, MRI); - return None; + return std::nullopt; } bool llvm::isBuildVectorAllZeros(const MachineInstr &MI, @@ -1122,17 +1138,17 @@ bool llvm::isBuildVectorAllOnes(const MachineInstr &MI, return isBuildVectorConstantSplat(MI, MRI, -1, AllowUndef); } -Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI, - const MachineRegisterInfo &MRI) { +std::optional<RegOrConstant> +llvm::getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI) { unsigned Opc = MI.getOpcode(); if (!isBuildVectorOp(Opc)) - return None; + return std::nullopt; if (auto Splat = getIConstantSplatSExtVal(MI, MRI)) return RegOrConstant(*Splat); auto Reg = MI.getOperand(1).getReg(); if (any_of(make_range(MI.operands_begin() + 2, MI.operands_end()), [&Reg](const MachineOperand &Op) { return Op.getReg() != Reg; })) - return None; + return std::nullopt; return RegOrConstant(Reg); } @@ -1192,7 +1208,7 @@ bool llvm::isConstantOrConstantVector(const MachineInstr &MI, return true; } -Optional<APInt> +std::optional<APInt> llvm::isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI) { Register Def = MI.getOperand(0).getReg(); @@ -1200,7 +1216,7 @@ llvm::isConstantOrConstantSplatVector(MachineInstr &MI, return C->Value; auto MaybeCst = getIConstantSplatSExtVal(MI, MRI); if (!MaybeCst) - return None; + return std::nullopt; const unsigned ScalarSize = MRI.getType(Def).getScalarSizeInBits(); return APInt(ScalarSize, *MaybeCst, true); } @@ -1283,6 +1299,18 @@ bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, llvm_unreachable("Invalid boolean contents"); } +bool llvm::isConstFalseVal(const TargetLowering &TLI, int64_t Val, + bool IsVector, bool IsFP) { + switch (TLI.getBooleanContents(IsVector, IsFP)) { + case TargetLowering::UndefinedBooleanContent: + return ~Val & 0x1; + case TargetLowering::ZeroOrOneBooleanContent: + case TargetLowering::ZeroOrNegativeOneBooleanContent: + return Val == 0; + } + llvm_unreachable("Invalid boolean contents"); +} + int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP) { switch (TLI.getBooleanContents(IsVector, IsFP)) { @@ -1335,3 +1363,22 @@ void llvm::eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI, LostDebugLocObserver *LocObserver) { return eraseInstrs({&MI}, MRI, LocObserver); } + +void llvm::salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI) { + for (auto &Def : MI.defs()) { + assert(Def.isReg() && "Must be a reg"); + + SmallVector<MachineOperand *, 16> DbgUsers; + for (auto &MOUse : MRI.use_operands(Def.getReg())) { + MachineInstr *DbgValue = MOUse.getParent(); + // Ignore partially formed DBG_VALUEs. + if (DbgValue->isNonListDebugValue() && DbgValue->getNumOperands() == 4) { + DbgUsers.push_back(&MOUse); + } + } + + if (!DbgUsers.empty()) { + salvageDebugInfoForDbgValue(MRI, MI, DbgUsers); + } + } +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp index f5833d3b9086..2ccf2def48f8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp @@ -62,6 +62,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -181,7 +182,7 @@ namespace { void collectUsedGlobalVariables(Module &M, StringRef Name); /// Keep track of the GlobalVariable that must not be merged away - SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables; + SmallSetVector<const GlobalVariable *, 16> MustKeepGlobalVariables; public: static char ID; // Pass identification, replacement for typeid. @@ -224,8 +225,8 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, llvm::stable_sort( Globals, [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) { // We don't support scalable global variables. - return DL.getTypeAllocSize(GV1->getValueType()).getFixedSize() < - DL.getTypeAllocSize(GV2->getValueType()).getFixedSize(); + return DL.getTypeAllocSize(GV1->getValueType()).getFixedValue() < + DL.getTypeAllocSize(GV2->getValueType()).getFixedValue(); }); // If we want to just blindly group all globals together, do so. @@ -619,9 +620,8 @@ bool GlobalMerge::doInitialization(Module &M) { LLVM_DEBUG({ dbgs() << "Number of GV that must be kept: " << MustKeepGlobalVariables.size() << "\n"; - for (auto KeptGV = MustKeepGlobalVariables.begin(); - KeptGV != MustKeepGlobalVariables.end(); KeptGV++) - dbgs() << "Kept: " << **KeptGV << "\n"; + for (const GlobalVariable *KeptGV : MustKeepGlobalVariables) + dbgs() << "Kept: " << *KeptGV << "\n"; }); // Grab all non-const globals. for (auto &GV : M.globals()) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp index da6ec76bd770..fa493af0eea7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -26,8 +26,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -98,11 +96,11 @@ class ImplicitNullChecks : public MachineFunctionPass { /// If non-None, then an instruction in \p Insts that also must be /// hoisted. - Optional<ArrayRef<MachineInstr *>::iterator> PotentialDependence; + std::optional<ArrayRef<MachineInstr *>::iterator> PotentialDependence; /*implicit*/ DependenceResult( bool CanReorder, - Optional<ArrayRef<MachineInstr *>::iterator> PotentialDependence) + std::optional<ArrayRef<MachineInstr *>::iterator> PotentialDependence) : CanReorder(CanReorder), PotentialDependence(PotentialDependence) { assert((!PotentialDependence || CanReorder) && "!CanReorder && PotentialDependence.hasValue() not allowed!"); @@ -255,18 +253,18 @@ ImplicitNullChecks::computeDependence(const MachineInstr *MI, assert(llvm::all_of(Block, canHandle) && "Check this first!"); assert(!is_contained(Block, MI) && "Block must be exclusive of MI!"); - Optional<ArrayRef<MachineInstr *>::iterator> Dep; + std::optional<ArrayRef<MachineInstr *>::iterator> Dep; for (auto I = Block.begin(), E = Block.end(); I != E; ++I) { if (canReorder(*I, MI)) continue; - if (Dep == None) { + if (Dep == std::nullopt) { // Found one possible dependency, keep track of it. Dep = I; } else { // We found two dependencies, so bail out. - return {false, None}; + return {false, std::nullopt}; } } @@ -805,7 +803,7 @@ void ImplicitNullChecks::rewriteNullChecks( // Insert an *unconditional* branch to not-null successor - we expect // block placement to remove fallthroughs later. TII->insertBranch(*NC.getCheckBlock(), NC.getNotNullSucc(), nullptr, - /*Cond=*/None, DL); + /*Cond=*/std::nullopt, DL); NumImplicitNullChecks++; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp index 5be98e114673..012892166ae7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp @@ -40,6 +40,7 @@ #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" +#include <optional> using namespace llvm; @@ -90,7 +91,7 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) { return false; TLI = STI.getTargetLowering(); - Optional<DomTreeUpdater> DTU; + std::optional<DomTreeUpdater> DTU; if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) DTU.emplace(DTWP->getDomTree(), DomTreeUpdater::UpdateStrategy::Lazy); @@ -198,7 +199,7 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) { CommonITy = ITy; } - auto GetSwitchValue = [DL, CommonITy](IndirectBrInst *IBr) { + auto GetSwitchValue = [CommonITy](IndirectBrInst *IBr) { return CastInst::CreatePointerCast( IBr->getAddress(), CommonITy, Twine(IBr->getAddress()->getName()) + ".switch_cast", IBr); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp index 3ea1d6c7f1ef..cf4fff878ad1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp @@ -15,7 +15,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -104,7 +103,7 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate { // Map from pair of (StackSlot and Original VNI) to a set of spills which // have the same stackslot and have equal values defined by Original VNI. - // These spills are mergeable and are hoist candiates. + // These spills are mergeable and are hoist candidates. using MergeableSpillsMap = MapVector<std::pair<int, VNInfo *>, SmallPtrSet<MachineInstr *, 16>>; MergeableSpillsMap MergeableSpills; @@ -270,7 +269,7 @@ static Register isFullCopyOf(const MachineInstr &MI, Register Reg) { static void getVDefInterval(const MachineInstr &MI, LiveIntervals &LIS) { for (const MachineOperand &MO : MI.operands()) - if (MO.isReg() && MO.isDef() && Register::isVirtualRegister(MO.getReg())) + if (MO.isReg() && MO.isDef() && MO.getReg().isVirtual()) LIS.getInterval(MO.getReg()); } @@ -281,13 +280,28 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) { Register Reg = Edit->getReg(); // A snippet is a tiny live range with only a single instruction using it - // besides copies to/from Reg or spills/fills. We accept: + // besides copies to/from Reg or spills/fills. + // Exception is done for statepoint instructions which will fold fills + // into their operands. + // We accept: // // %snip = COPY %Reg / FILL fi# // %snip = USE %snip + // %snip = STATEPOINT %snip in var arg area // %Reg = COPY %snip / SPILL %snip, fi# // - if (SnipLI.getNumValNums() > 2 || !LIS.intervalIsInOneMBB(SnipLI)) + if (!LIS.intervalIsInOneMBB(SnipLI)) + return false; + + // Number of defs should not exceed 2 not accounting defs coming from + // statepoint instructions. + unsigned NumValNums = SnipLI.getNumValNums(); + for (auto *VNI : SnipLI.vnis()) { + MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); + if (MI->getOpcode() == TargetOpcode::STATEPOINT) + --NumValNums; + } + if (NumValNums > 2) return false; MachineInstr *UseMI = nullptr; @@ -312,6 +326,9 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) { if (SnipLI.reg() == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) continue; + if (StatepointOpers::isFoldableReg(&MI, SnipLI.reg())) + continue; + // Allow a single additional instruction. if (UseMI && &MI != UseMI) return false; @@ -417,7 +434,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI, MachineInstrSpan MIS(MII, MBB); // Insert spill without kill flag immediately after def. TII.storeRegToStackSlot(*MBB, MII, SrcReg, false, StackSlot, - MRI.getRegClass(SrcReg), &TRI); + MRI.getRegClass(SrcReg), &TRI, Register()); LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MII); for (const MachineInstr &MI : make_range(MIS.begin(), MII)) getVDefInterval(MI, LIS); @@ -894,7 +911,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, if (!MO->isReg()) continue; Register Reg = MO->getReg(); - if (!Reg || Register::isVirtualRegister(Reg) || MRI.isReserved(Reg)) { + if (!Reg || Reg.isVirtual() || MRI.isReserved(Reg)) { continue; } // Skip non-Defs, including undef uses and internal reads. @@ -993,7 +1010,7 @@ void InlineSpiller::insertReload(Register NewVReg, MachineInstrSpan MIS(MI, &MBB); TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot, - MRI.getRegClass(NewVReg), &TRI); + MRI.getRegClass(NewVReg), &TRI, Register()); LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI); @@ -1030,7 +1047,7 @@ void InlineSpiller::insertSpill(Register NewVReg, bool isKill, if (IsRealSpill) TII.storeRegToStackSlot(MBB, SpillBefore, NewVReg, isKill, StackSlot, - MRI.getRegClass(NewVReg), &TRI); + MRI.getRegClass(NewVReg), &TRI, Register()); else // Don't spill undef value. // Anything works for undef, in particular keeping the memory @@ -1596,7 +1613,7 @@ void HoistSpillHelper::hoistAllSpills() { MachineBasicBlock::iterator MII = IPA.getLastInsertPointIter(OrigLI, *BB); MachineInstrSpan MIS(MII, BB); TII.storeRegToStackSlot(*BB, MII, LiveReg, false, Slot, - MRI.getRegClass(LiveReg), &TRI); + MRI.getRegClass(LiveReg), &TRI, Register()); LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MII); for (const MachineInstr &MI : make_range(MIS.begin(), MII)) getVDefInterval(MI, LIS); @@ -1613,7 +1630,7 @@ void HoistSpillHelper::hoistAllSpills() { RMEnt->removeOperand(i - 1); } } - Edit.eliminateDeadDefs(SpillsToRm, None); + Edit.eliminateDeadDefs(SpillsToRm, std::nullopt); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp index a0f304659bca..0d36badfa10f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -887,7 +887,7 @@ public: ConstantInt::get(Type::getInt32Ty(LI->getContext()), 0), ConstantInt::get(Type::getInt32Ty(LI->getContext()), i), }; - int64_t Ofs = DL.getIndexedOffsetInType(Result.VTy, makeArrayRef(Idx, 2)); + int64_t Ofs = DL.getIndexedOffsetInType(Result.VTy, ArrayRef(Idx, 2)); Result.EI[i] = ElementInfo(Offset + Ofs, i == 0 ? LI : nullptr); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp index 808a79d9792a..61920a0e04ab 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -430,7 +430,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { ReplaceFPIntrinsicWithCall(CI, "copysignf", "copysign", "copysignl"); break; } - case Intrinsic::flt_rounds: + case Intrinsic::get_rounding: // Lower to "round to the nearest" if (!CI->getType()->isVoidTy()) CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp index 23220872b532..f1953c363b59 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp @@ -151,7 +151,7 @@ bool JMCInstrumenter::runOnModule(Module &M) { bool IsELF = ModuleTriple.isOSBinFormatELF(); assert((IsELF || IsMSVC) && "Unsupported triple for JMC"); bool UseX86FastCall = IsMSVC && ModuleTriple.getArch() == Triple::x86; - const char *const FlagSymbolSection = IsELF ? ".just.my.code" : ".msvcjmc"; + const char *const FlagSymbolSection = IsELF ? ".data.just.my.code" : ".msvcjmc"; GlobalValue *CheckFunction = nullptr; DenseMap<DISubprogram *, Constant *> SavedFlags(8); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index 191596dbf53e..ba417322d4f6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -127,6 +127,7 @@ #include "InstrRefBasedImpl.h" #include "LiveDebugValues.h" +#include <optional> using namespace llvm; using namespace LiveDebugValues; @@ -155,6 +156,8 @@ static cl::opt<unsigned> cl::desc("livedebugvalues-stack-ws-limit"), cl::init(250)); +DbgOpID DbgOpID::UndefID = DbgOpID(0xffffffff); + /// Tracker for converting machine value locations and variable values into /// variable locations (the output of LiveDebugValues), recorded as DBG_VALUEs /// specifying block live-in locations and transfers within blocks. @@ -191,9 +194,25 @@ public: SmallVector<MachineInstr *, 4> Insts; /// Vector of DBG_VALUEs to insert. }; - struct LocAndProperties { - LocIdx Loc; + /// Stores the resolved operands (machine locations and constants) and + /// qualifying meta-information needed to construct a concrete DBG_VALUE-like + /// instruction. + struct ResolvedDbgValue { + SmallVector<ResolvedDbgOp> Ops; DbgValueProperties Properties; + + ResolvedDbgValue(SmallVectorImpl<ResolvedDbgOp> &Ops, + DbgValueProperties Properties) + : Ops(Ops.begin(), Ops.end()), Properties(Properties) {} + + /// Returns all the LocIdx values used in this struct, in the order in which + /// they appear as operands in the debug value; may contain duplicates. + auto loc_indices() const { + return map_range( + make_filter_range( + Ops, [](const ResolvedDbgOp &Op) { return !Op.IsConst; }), + [](const ResolvedDbgOp &Op) { return Op.Loc; }); + } }; /// Collection of transfers (DBG_VALUEs) to be inserted. @@ -213,7 +232,7 @@ public: /// Map from DebugVariable to it's current location and qualifying meta /// information. To be used in conjunction with ActiveMLocs to construct /// enough information for the DBG_VALUEs for a particular LocIdx. - DenseMap<DebugVariable, LocAndProperties> ActiveVLocs; + DenseMap<DebugVariable, ResolvedDbgValue> ActiveVLocs; /// Temporary cache of DBG_VALUEs to be entered into the Transfers collection. SmallVector<MachineInstr *, 4> PendingDbgValues; @@ -223,11 +242,15 @@ public: /// defined in this block. struct UseBeforeDef { /// Value of this variable, def'd in block. - ValueIDNum ID; + SmallVector<DbgOp> Values; /// Identity of this variable. DebugVariable Var; /// Additional variable properties. DbgValueProperties Properties; + UseBeforeDef(ArrayRef<DbgOp> Values, const DebugVariable &Var, + const DbgValueProperties &Properties) + : Values(Values.begin(), Values.end()), Var(Var), + Properties(Properties) {} }; /// Map from instruction index (within the block) to the set of UseBeforeDefs @@ -252,6 +275,153 @@ public: ShouldEmitDebugEntryValues = TM.Options.ShouldEmitDebugEntryValues(); } + bool isCalleeSaved(LocIdx L) const { + unsigned Reg = MTracker->LocIdxToLocID[L]; + if (Reg >= MTracker->NumRegs) + return false; + for (MCRegAliasIterator RAI(Reg, &TRI, true); RAI.isValid(); ++RAI) + if (CalleeSavedRegs.test(*RAI)) + return true; + return false; + }; + + // An estimate of the expected lifespan of values at a machine location, with + // a greater value corresponding to a longer expected lifespan, i.e. spill + // slots generally live longer than callee-saved registers which generally + // live longer than non-callee-saved registers. The minimum value of 0 + // corresponds to an illegal location that cannot have a "lifespan" at all. + enum class LocationQuality : unsigned char { + Illegal = 0, + Register, + CalleeSavedRegister, + SpillSlot, + Best = SpillSlot + }; + + class LocationAndQuality { + unsigned Location : 24; + unsigned Quality : 8; + + public: + LocationAndQuality() : Location(0), Quality(0) {} + LocationAndQuality(LocIdx L, LocationQuality Q) + : Location(L.asU64()), Quality(static_cast<unsigned>(Q)) {} + LocIdx getLoc() const { + if (!Quality) + return LocIdx::MakeIllegalLoc(); + return LocIdx(Location); + } + LocationQuality getQuality() const { return LocationQuality(Quality); } + bool isIllegal() const { return !Quality; } + bool isBest() const { return getQuality() == LocationQuality::Best; } + }; + + // Returns the LocationQuality for the location L iff the quality of L is + // is strictly greater than the provided minimum quality. + std::optional<LocationQuality> + getLocQualityIfBetter(LocIdx L, LocationQuality Min) const { + if (L.isIllegal()) + return std::nullopt; + if (Min >= LocationQuality::SpillSlot) + return std::nullopt; + if (MTracker->isSpill(L)) + return LocationQuality::SpillSlot; + if (Min >= LocationQuality::CalleeSavedRegister) + return std::nullopt; + if (isCalleeSaved(L)) + return LocationQuality::CalleeSavedRegister; + if (Min >= LocationQuality::Register) + return std::nullopt; + return LocationQuality::Register; + } + + /// For a variable \p Var with the live-in value \p Value, attempts to resolve + /// the DbgValue to a concrete DBG_VALUE, emitting that value and loading the + /// tracking information to track Var throughout the block. + /// \p ValueToLoc is a map containing the best known location for every + /// ValueIDNum that Value may use. + /// \p MBB is the basic block that we are loading the live-in value for. + /// \p DbgOpStore is the map containing the DbgOpID->DbgOp mapping needed to + /// determine the values used by Value. + void loadVarInloc(MachineBasicBlock &MBB, DbgOpIDMap &DbgOpStore, + const DenseMap<ValueIDNum, LocationAndQuality> &ValueToLoc, + DebugVariable Var, DbgValue Value) { + SmallVector<DbgOp> DbgOps; + SmallVector<ResolvedDbgOp> ResolvedDbgOps; + bool IsValueValid = true; + unsigned LastUseBeforeDef = 0; + + // If every value used by the incoming DbgValue is available at block + // entry, ResolvedDbgOps will contain the machine locations/constants for + // those values and will be used to emit a debug location. + // If one or more values are not yet available, but will all be defined in + // this block, then LastUseBeforeDef will track the instruction index in + // this BB at which the last of those values is defined, DbgOps will + // contain the values that we will emit when we reach that instruction. + // If one or more values are undef or not available throughout this block, + // and we can't recover as an entry value, we set IsValueValid=false and + // skip this variable. + for (DbgOpID ID : Value.getDbgOpIDs()) { + DbgOp Op = DbgOpStore.find(ID); + DbgOps.push_back(Op); + if (ID.isUndef()) { + IsValueValid = false; + break; + } + if (ID.isConst()) { + ResolvedDbgOps.push_back(Op.MO); + continue; + } + + // If the value has no location, we can't make a variable location. + const ValueIDNum &Num = Op.ID; + auto ValuesPreferredLoc = ValueToLoc.find(Num); + if (ValuesPreferredLoc->second.isIllegal()) { + // If it's a def that occurs in this block, register it as a + // use-before-def to be resolved as we step through the block. + // Continue processing values so that we add any other UseBeforeDef + // entries needed for later. + if (Num.getBlock() == (unsigned)MBB.getNumber() && !Num.isPHI()) { + LastUseBeforeDef = std::max(LastUseBeforeDef, + static_cast<unsigned>(Num.getInst())); + continue; + } + recoverAsEntryValue(Var, Value.Properties, Num); + IsValueValid = false; + break; + } + + // Defer modifying ActiveVLocs until after we've confirmed we have a + // live range. + LocIdx M = ValuesPreferredLoc->second.getLoc(); + ResolvedDbgOps.push_back(M); + } + + // If we cannot produce a valid value for the LiveIn value within this + // block, skip this variable. + if (!IsValueValid) + return; + + // Add UseBeforeDef entry for the last value to be defined in this block. + if (LastUseBeforeDef) { + addUseBeforeDef(Var, Value.Properties, DbgOps, + LastUseBeforeDef); + return; + } + + // The LiveIn value is available at block entry, begin tracking and record + // the transfer. + for (const ResolvedDbgOp &Op : ResolvedDbgOps) + if (!Op.IsConst) + ActiveMLocs[Op.Loc].insert(Var); + auto NewValue = ResolvedDbgValue{ResolvedDbgOps, Value.Properties}; + auto Result = ActiveVLocs.insert(std::make_pair(Var, NewValue)); + if (!Result.second) + Result.first->second = NewValue; + PendingDbgValues.push_back( + MTracker->emitLoc(ResolvedDbgOps, Var, Value.Properties)); + } + /// Load object with live-in variable values. \p mlocs contains the live-in /// values in each machine location, while \p vlocs the live-in variable /// values. This method picks variable locations for the live-in variables, @@ -259,7 +429,7 @@ public: /// object fields to track variable locations as we step through the block. /// FIXME: could just examine mloctracker instead of passing in \p mlocs? void - loadInlocs(MachineBasicBlock &MBB, ValueTable &MLocs, + loadInlocs(MachineBasicBlock &MBB, ValueTable &MLocs, DbgOpIDMap &DbgOpStore, const SmallVectorImpl<std::pair<DebugVariable, DbgValue>> &VLocs, unsigned NumLocs) { ActiveMLocs.clear(); @@ -269,24 +439,16 @@ public: UseBeforeDefs.clear(); UseBeforeDefVariables.clear(); - auto isCalleeSaved = [&](LocIdx L) { - unsigned Reg = MTracker->LocIdxToLocID[L]; - if (Reg >= MTracker->NumRegs) - return false; - for (MCRegAliasIterator RAI(Reg, &TRI, true); RAI.isValid(); ++RAI) - if (CalleeSavedRegs.test(*RAI)) - return true; - return false; - }; - // Map of the preferred location for each value. - DenseMap<ValueIDNum, LocIdx> ValueToLoc; + DenseMap<ValueIDNum, LocationAndQuality> ValueToLoc; // Initialized the preferred-location map with illegal locations, to be // filled in later. for (const auto &VLoc : VLocs) if (VLoc.second.Kind == DbgValue::Def) - ValueToLoc.insert({VLoc.second.ID, LocIdx::MakeIllegalLoc()}); + for (DbgOpID OpID : VLoc.second.getDbgOpIDs()) + if (!OpID.ID.IsConst) + ValueToLoc.insert({DbgOpStore.find(OpID).ID, LocationAndQuality()}); ActiveMLocs.reserve(VLocs.size()); ActiveVLocs.reserve(VLocs.size()); @@ -297,6 +459,8 @@ public: for (auto Location : MTracker->locations()) { LocIdx Idx = Location.Idx; ValueIDNum &VNum = MLocs[Idx.asU64()]; + if (VNum == ValueIDNum::EmptyValue) + continue; VarLocs.push_back(VNum); // Is there a variable that wants a location for this value? If not, skip. @@ -304,47 +468,18 @@ public: if (VIt == ValueToLoc.end()) continue; - LocIdx CurLoc = VIt->second; - // In order of preference, pick: - // * Callee saved registers, - // * Other registers, - // * Spill slots. - if (CurLoc.isIllegal() || MTracker->isSpill(CurLoc) || - (!isCalleeSaved(CurLoc) && isCalleeSaved(Idx.asU64()))) { - // Insert, or overwrite if insertion failed. - VIt->second = Idx; - } + auto &Previous = VIt->second; + // If this is the first location with that value, pick it. Otherwise, + // consider whether it's a "longer term" location. + std::optional<LocationQuality> ReplacementQuality = + getLocQualityIfBetter(Idx, Previous.getQuality()); + if (ReplacementQuality) + Previous = LocationAndQuality(Idx, *ReplacementQuality); } // Now map variables to their picked LocIdxes. for (const auto &Var : VLocs) { - if (Var.second.Kind == DbgValue::Const) { - PendingDbgValues.push_back( - emitMOLoc(*Var.second.MO, Var.first, Var.second.Properties)); - continue; - } - - // If the value has no location, we can't make a variable location. - const ValueIDNum &Num = Var.second.ID; - auto ValuesPreferredLoc = ValueToLoc.find(Num); - if (ValuesPreferredLoc->second.isIllegal()) { - // If it's a def that occurs in this block, register it as a - // use-before-def to be resolved as we step through the block. - if (Num.getBlock() == (unsigned)MBB.getNumber() && !Num.isPHI()) - addUseBeforeDef(Var.first, Var.second.Properties, Num); - else - recoverAsEntryValue(Var.first, Var.second.Properties, Num); - continue; - } - - LocIdx M = ValuesPreferredLoc->second; - auto NewValue = LocAndProperties{M, Var.second.Properties}; - auto Result = ActiveVLocs.insert(std::make_pair(Var.first, NewValue)); - if (!Result.second) - Result.first->second = NewValue; - ActiveMLocs[M].insert(Var.first); - PendingDbgValues.push_back( - MTracker->emitLoc(M, Var.first, Var.second.Properties)); + loadVarInloc(MBB, DbgOpStore, ValueToLoc, Var.first, Var.second); } flushDbgValues(MBB.begin(), &MBB); } @@ -352,9 +487,9 @@ public: /// Record that \p Var has value \p ID, a value that becomes available /// later in the function. void addUseBeforeDef(const DebugVariable &Var, - const DbgValueProperties &Properties, ValueIDNum ID) { - UseBeforeDef UBD = {ID, Var, Properties}; - UseBeforeDefs[ID.getInst()].push_back(UBD); + const DbgValueProperties &Properties, + const SmallVectorImpl<DbgOp> &DbgOps, unsigned Inst) { + UseBeforeDefs[Inst].emplace_back(DbgOps, Var, Properties); UseBeforeDefVariables.insert(Var); } @@ -367,22 +502,77 @@ public: if (MIt == UseBeforeDefs.end()) return; + // Map of values to the locations that store them for every value used by + // the variables that may have become available. + SmallDenseMap<ValueIDNum, LocationAndQuality> ValueToLoc; + + // Populate ValueToLoc with illegal default mappings for every value used by + // any UseBeforeDef variables for this instruction. for (auto &Use : MIt->second) { - LocIdx L = Use.ID.getLoc(); + if (!UseBeforeDefVariables.count(Use.Var)) + continue; + + for (DbgOp &Op : Use.Values) { + assert(!Op.isUndef() && "UseBeforeDef erroneously created for a " + "DbgValue with undef values."); + if (Op.IsConst) + continue; + + ValueToLoc.insert({Op.ID, LocationAndQuality()}); + } + } - // If something goes very wrong, we might end up labelling a COPY - // instruction or similar with an instruction number, where it doesn't - // actually define a new value, instead it moves a value. In case this - // happens, discard. - if (MTracker->readMLoc(L) != Use.ID) + // Exit early if we have no DbgValues to produce. + if (ValueToLoc.empty()) + return; + + // Determine the best location for each desired value. + for (auto Location : MTracker->locations()) { + LocIdx Idx = Location.Idx; + ValueIDNum &LocValueID = Location.Value; + + // Is there a variable that wants a location for this value? If not, skip. + auto VIt = ValueToLoc.find(LocValueID); + if (VIt == ValueToLoc.end()) continue; - // If a different debug instruction defined the variable value / location - // since the start of the block, don't materialize this use-before-def. + auto &Previous = VIt->second; + // If this is the first location with that value, pick it. Otherwise, + // consider whether it's a "longer term" location. + std::optional<LocationQuality> ReplacementQuality = + getLocQualityIfBetter(Idx, Previous.getQuality()); + if (ReplacementQuality) + Previous = LocationAndQuality(Idx, *ReplacementQuality); + } + + // Using the map of values to locations, produce a final set of values for + // this variable. + for (auto &Use : MIt->second) { if (!UseBeforeDefVariables.count(Use.Var)) continue; - PendingDbgValues.push_back(MTracker->emitLoc(L, Use.Var, Use.Properties)); + SmallVector<ResolvedDbgOp> DbgOps; + + for (DbgOp &Op : Use.Values) { + if (Op.IsConst) { + DbgOps.push_back(Op.MO); + continue; + } + LocIdx NewLoc = ValueToLoc.find(Op.ID)->second.getLoc(); + if (NewLoc.isIllegal()) + break; + DbgOps.push_back(NewLoc); + } + + // If at least one value used by this debug value is no longer available, + // i.e. one of the values was killed before we finished defining all of + // the values used by this variable, discard. + if (DbgOps.size() != Use.Values.size()) + continue; + + // Otherwise, we're good to go. + PendingDbgValues.push_back( + MTracker->emitLoc(DbgOps, Use.Var, Use.Properties)); } flushDbgValues(pos, nullptr); } @@ -440,8 +630,21 @@ public: if (!ShouldEmitDebugEntryValues) return false; + const DIExpression *DIExpr = Prop.DIExpr; + + // We don't currently emit entry values for DBG_VALUE_LISTs. + if (Prop.IsVariadic) { + // If this debug value can be converted to be non-variadic, then do so; + // otherwise give up. + auto NonVariadicExpression = + DIExpression::convertToNonVariadicExpression(DIExpr); + if (!NonVariadicExpression) + return false; + DIExpr = *NonVariadicExpression; + } + // Is the variable appropriate for entry values (i.e., is a parameter). - if (!isEntryValueVariable(Var, Prop.DIExpr)) + if (!isEntryValueVariable(Var, DIExpr)) return false; // Is the value assigned to this variable still the entry value? @@ -450,11 +653,12 @@ public: // Emit a variable location using an entry value expression. DIExpression *NewExpr = - DIExpression::prepend(Prop.DIExpr, DIExpression::EntryValue); + DIExpression::prepend(DIExpr, DIExpression::EntryValue); Register Reg = MTracker->LocIdxToLocID[Num.getLoc()]; MachineOperand MO = MachineOperand::CreateReg(Reg, false); - PendingDbgValues.push_back(emitMOLoc(MO, Var, {NewExpr, Prop.Indirect})); + PendingDbgValues.push_back( + emitMOLoc(MO, Var, {NewExpr, Prop.Indirect, false})); return true; } @@ -464,62 +668,100 @@ public: MI.getDebugLoc()->getInlinedAt()); DbgValueProperties Properties(MI); - const MachineOperand &MO = MI.getOperand(0); - // Ignore non-register locations, we don't transfer those. - if (!MO.isReg() || MO.getReg() == 0) { + if (MI.isUndefDebugValue() || + all_of(MI.debug_operands(), + [](const MachineOperand &MO) { return !MO.isReg(); })) { auto It = ActiveVLocs.find(Var); if (It != ActiveVLocs.end()) { - ActiveMLocs[It->second.Loc].erase(Var); + for (LocIdx Loc : It->second.loc_indices()) + ActiveMLocs[Loc].erase(Var); ActiveVLocs.erase(It); - } + } // Any use-before-defs no longer apply. UseBeforeDefVariables.erase(Var); return; } - Register Reg = MO.getReg(); - LocIdx NewLoc = MTracker->getRegMLoc(Reg); - redefVar(MI, Properties, NewLoc); + SmallVector<ResolvedDbgOp> NewLocs; + for (const MachineOperand &MO : MI.debug_operands()) { + if (MO.isReg()) { + // Any undef regs have already been filtered out above. + Register Reg = MO.getReg(); + LocIdx NewLoc = MTracker->getRegMLoc(Reg); + NewLocs.push_back(NewLoc); + } else { + NewLocs.push_back(MO); + } + } + + redefVar(MI, Properties, NewLocs); } /// Handle a change in variable location within a block. Terminate the /// variables current location, and record the value it now refers to, so /// that we can detect location transfers later on. void redefVar(const MachineInstr &MI, const DbgValueProperties &Properties, - Optional<LocIdx> OptNewLoc) { + SmallVectorImpl<ResolvedDbgOp> &NewLocs) { DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), MI.getDebugLoc()->getInlinedAt()); // Any use-before-defs no longer apply. UseBeforeDefVariables.erase(Var); - // Erase any previous location, + // Erase any previous location. auto It = ActiveVLocs.find(Var); - if (It != ActiveVLocs.end()) - ActiveMLocs[It->second.Loc].erase(Var); + if (It != ActiveVLocs.end()) { + for (LocIdx Loc : It->second.loc_indices()) + ActiveMLocs[Loc].erase(Var); + } // If there _is_ no new location, all we had to do was erase. - if (!OptNewLoc) + if (NewLocs.empty()) { + if (It != ActiveVLocs.end()) + ActiveVLocs.erase(It); return; - LocIdx NewLoc = *OptNewLoc; - - // Check whether our local copy of values-by-location in #VarLocs is out of - // date. Wipe old tracking data for the location if it's been clobbered in - // the meantime. - if (MTracker->readMLoc(NewLoc) != VarLocs[NewLoc.asU64()]) { - for (const auto &P : ActiveMLocs[NewLoc]) { - ActiveVLocs.erase(P); + } + + SmallVector<std::pair<LocIdx, DebugVariable>> LostMLocs; + for (ResolvedDbgOp &Op : NewLocs) { + if (Op.IsConst) + continue; + + LocIdx NewLoc = Op.Loc; + + // Check whether our local copy of values-by-location in #VarLocs is out + // of date. Wipe old tracking data for the location if it's been clobbered + // in the meantime. + if (MTracker->readMLoc(NewLoc) != VarLocs[NewLoc.asU64()]) { + for (const auto &P : ActiveMLocs[NewLoc]) { + auto LostVLocIt = ActiveVLocs.find(P); + if (LostVLocIt != ActiveVLocs.end()) { + for (LocIdx Loc : LostVLocIt->second.loc_indices()) { + // Every active variable mapping for NewLoc will be cleared, no + // need to track individual variables. + if (Loc == NewLoc) + continue; + LostMLocs.emplace_back(Loc, P); + } + } + ActiveVLocs.erase(P); + } + for (const auto &LostMLoc : LostMLocs) + ActiveMLocs[LostMLoc.first].erase(LostMLoc.second); + LostMLocs.clear(); + It = ActiveVLocs.find(Var); + ActiveMLocs[NewLoc.asU64()].clear(); + VarLocs[NewLoc.asU64()] = MTracker->readMLoc(NewLoc); } - ActiveMLocs[NewLoc.asU64()].clear(); - VarLocs[NewLoc.asU64()] = MTracker->readMLoc(NewLoc); + + ActiveMLocs[NewLoc].insert(Var); } - ActiveMLocs[NewLoc].insert(Var); if (It == ActiveVLocs.end()) { ActiveVLocs.insert( - std::make_pair(Var, LocAndProperties{NewLoc, Properties})); + std::make_pair(Var, ResolvedDbgValue(NewLocs, Properties))); } else { - It->second.Loc = NewLoc; + It->second.Ops.assign(NewLocs); It->second.Properties = Properties; } } @@ -551,7 +793,7 @@ public: // Examine the remaining variable locations: if we can find the same value // again, we can recover the location. - Optional<LocIdx> NewLoc = None; + std::optional<LocIdx> NewLoc; for (auto Loc : MTracker->locations()) if (Loc.Value == OldValue) NewLoc = Loc.Idx; @@ -570,28 +812,54 @@ public: // Examine all the variables based on this location. DenseSet<DebugVariable> NewMLocs; + // If no new location has been found, every variable that depends on this + // MLoc is dead, so end their existing MLoc->Var mappings as well. + SmallVector<std::pair<LocIdx, DebugVariable>> LostMLocs; for (const auto &Var : ActiveMLocIt->second) { auto ActiveVLocIt = ActiveVLocs.find(Var); // Re-state the variable location: if there's no replacement then NewLoc - // is None and a $noreg DBG_VALUE will be created. Otherwise, a DBG_VALUE - // identifying the alternative location will be emitted. + // is std::nullopt and a $noreg DBG_VALUE will be created. Otherwise, a + // DBG_VALUE identifying the alternative location will be emitted. const DbgValueProperties &Properties = ActiveVLocIt->second.Properties; - PendingDbgValues.push_back(MTracker->emitLoc(NewLoc, Var, Properties)); + + // Produce the new list of debug ops - an empty list if no new location + // was found, or the existing list with the substitution MLoc -> NewLoc + // otherwise. + SmallVector<ResolvedDbgOp> DbgOps; + if (NewLoc) { + ResolvedDbgOp OldOp(MLoc); + ResolvedDbgOp NewOp(*NewLoc); + // Insert illegal ops to overwrite afterwards. + DbgOps.insert(DbgOps.begin(), ActiveVLocIt->second.Ops.size(), + ResolvedDbgOp(LocIdx::MakeIllegalLoc())); + replace_copy(ActiveVLocIt->second.Ops, DbgOps.begin(), OldOp, NewOp); + } + + PendingDbgValues.push_back(MTracker->emitLoc(DbgOps, Var, Properties)); // Update machine locations <=> variable locations maps. Defer updating - // ActiveMLocs to avoid invalidaing the ActiveMLocIt iterator. + // ActiveMLocs to avoid invalidating the ActiveMLocIt iterator. if (!NewLoc) { + for (LocIdx Loc : ActiveVLocIt->second.loc_indices()) { + if (Loc != MLoc) + LostMLocs.emplace_back(Loc, Var); + } ActiveVLocs.erase(ActiveVLocIt); } else { - ActiveVLocIt->second.Loc = *NewLoc; + ActiveVLocIt->second.Ops = DbgOps; NewMLocs.insert(Var); } } - // Commit any deferred ActiveMLoc changes. - if (!NewMLocs.empty()) - for (auto &Var : NewMLocs) - ActiveMLocs[*NewLoc].insert(Var); + // Remove variables from ActiveMLocs if they no longer use any other MLocs + // due to being killed by this clobber. + for (auto &LocVarIt : LostMLocs) { + auto LostMLocIt = ActiveMLocs.find(LocVarIt.first); + assert(LostMLocIt != ActiveMLocs.end() && + "Variable was using this MLoc, but ActiveMLocs[MLoc] has no " + "entries?"); + LostMLocIt->second.erase(LocVarIt.second); + } // We lazily track what locations have which values; if we've found a new // location for the clobbered value, remember it. @@ -600,9 +868,11 @@ public: flushDbgValues(Pos, nullptr); - // Re-find ActiveMLocIt, iterator could have been invalidated. - ActiveMLocIt = ActiveMLocs.find(MLoc); + // Commit ActiveMLoc changes. ActiveMLocIt->second.clear(); + if (!NewMLocs.empty()) + for (auto &Var : NewMLocs) + ActiveMLocs[*NewLoc].insert(Var); } /// Transfer variables based on \p Src to be based on \p Dst. This handles @@ -619,17 +889,22 @@ public: // Move set of active variables from one location to another. auto MovingVars = ActiveMLocs[Src]; - ActiveMLocs[Dst] = MovingVars; + ActiveMLocs[Dst].insert(MovingVars.begin(), MovingVars.end()); VarLocs[Dst.asU64()] = VarLocs[Src.asU64()]; // For each variable based on Src; create a location at Dst. + ResolvedDbgOp SrcOp(Src); + ResolvedDbgOp DstOp(Dst); for (const auto &Var : MovingVars) { auto ActiveVLocIt = ActiveVLocs.find(Var); assert(ActiveVLocIt != ActiveVLocs.end()); - ActiveVLocIt->second.Loc = Dst; - MachineInstr *MI = - MTracker->emitLoc(Dst, Var, ActiveVLocIt->second.Properties); + // Update all instances of Src in the variable's tracked values to Dst. + std::replace(ActiveVLocIt->second.Ops.begin(), + ActiveVLocIt->second.Ops.end(), SrcOp, DstOp); + + MachineInstr *MI = MTracker->emitLoc(ActiveVLocIt->second.Ops, Var, + ActiveVLocIt->second.Properties); PendingDbgValues.push_back(MI); } ActiveMLocs[Src].clear(); @@ -667,17 +942,43 @@ ValueIDNum ValueIDNum::EmptyValue = {UINT_MAX, UINT_MAX, UINT_MAX}; ValueIDNum ValueIDNum::TombstoneValue = {UINT_MAX, UINT_MAX, UINT_MAX - 1}; #ifndef NDEBUG -void DbgValue::dump(const MLocTracker *MTrack) const { - if (Kind == Const) { - MO->dump(); - } else if (Kind == NoVal) { - dbgs() << "NoVal(" << BlockNo << ")"; - } else if (Kind == VPHI) { - dbgs() << "VPHI(" << BlockNo << "," << MTrack->IDAsString(ID) << ")"; +void ResolvedDbgOp::dump(const MLocTracker *MTrack) const { + if (IsConst) { + dbgs() << MO; } else { - assert(Kind == Def); + dbgs() << MTrack->LocIdxToName(Loc); + } +} +void DbgOp::dump(const MLocTracker *MTrack) const { + if (IsConst) { + dbgs() << MO; + } else if (!isUndef()) { dbgs() << MTrack->IDAsString(ID); } +} +void DbgOpID::dump(const MLocTracker *MTrack, const DbgOpIDMap *OpStore) const { + if (!OpStore) { + dbgs() << "ID(" << asU32() << ")"; + } else { + OpStore->find(*this).dump(MTrack); + } +} +void DbgValue::dump(const MLocTracker *MTrack, + const DbgOpIDMap *OpStore) const { + if (Kind == NoVal) { + dbgs() << "NoVal(" << BlockNo << ")"; + } else if (Kind == VPHI || Kind == Def) { + if (Kind == VPHI) + dbgs() << "VPHI(" << BlockNo << ","; + else + dbgs() << "Def("; + for (unsigned Idx = 0; Idx < getDbgOpIDs().size(); ++Idx) { + getDbgOpID(Idx).dump(MTrack, OpStore); + if (Idx != 0) + dbgs() << ","; + } + dbgs() << ")"; + } if (Properties.Indirect) dbgs() << " indir"; if (Properties.DIExpr) @@ -789,14 +1090,14 @@ void MLocTracker::writeRegMask(const MachineOperand *MO, unsigned CurBB, Masks.push_back(std::make_pair(MO, InstID)); } -Optional<SpillLocationNo> MLocTracker::getOrTrackSpillLoc(SpillLoc L) { +std::optional<SpillLocationNo> MLocTracker::getOrTrackSpillLoc(SpillLoc L) { SpillLocationNo SpillID(SpillLocs.idFor(L)); if (SpillID.id() == 0) { // If there is no location, and we have reached the limit of how many stack // slots to track, then don't track this one. if (SpillLocs.size() >= StackWorkingSetLimit) - return None; + return std::nullopt; // Spill location is untracked: create record for this one, and all // subregister slots too. @@ -853,120 +1154,157 @@ LLVM_DUMP_METHOD void MLocTracker::dump_mloc_map() { } #endif -MachineInstrBuilder MLocTracker::emitLoc(Optional<LocIdx> MLoc, - const DebugVariable &Var, - const DbgValueProperties &Properties) { +MachineInstrBuilder +MLocTracker::emitLoc(const SmallVectorImpl<ResolvedDbgOp> &DbgOps, + const DebugVariable &Var, + const DbgValueProperties &Properties) { DebugLoc DL = DILocation::get(Var.getVariable()->getContext(), 0, 0, Var.getVariable()->getScope(), const_cast<DILocation *>(Var.getInlinedAt())); - auto MIB = BuildMI(MF, DL, TII.get(TargetOpcode::DBG_VALUE)); + + const MCInstrDesc &Desc = Properties.IsVariadic + ? TII.get(TargetOpcode::DBG_VALUE_LIST) + : TII.get(TargetOpcode::DBG_VALUE); + +#ifdef EXPENSIVE_CHECKS + assert(all_of(DbgOps, + [](const ResolvedDbgOp &Op) { + return Op.IsConst || !Op.Loc.isIllegal(); + }) && + "Did not expect illegal ops in DbgOps."); + assert((DbgOps.size() == 0 || + DbgOps.size() == Properties.getLocationOpCount()) && + "Expected to have either one DbgOp per MI LocationOp, or none."); +#endif + + auto GetRegOp = [](unsigned Reg) -> MachineOperand { + return MachineOperand::CreateReg( + /* Reg */ Reg, /* isDef */ false, /* isImp */ false, + /* isKill */ false, /* isDead */ false, + /* isUndef */ false, /* isEarlyClobber */ false, + /* SubReg */ 0, /* isDebug */ true); + }; + + SmallVector<MachineOperand> MOs; + + auto EmitUndef = [&]() { + MOs.clear(); + MOs.assign(Properties.getLocationOpCount(), GetRegOp(0)); + return BuildMI(MF, DL, Desc, false, MOs, Var.getVariable(), + Properties.DIExpr); + }; + + // Don't bother passing any real operands to BuildMI if any of them would be + // $noreg. + if (DbgOps.empty()) + return EmitUndef(); + + bool Indirect = Properties.Indirect; const DIExpression *Expr = Properties.DIExpr; - if (!MLoc) { - // No location -> DBG_VALUE $noreg - MIB.addReg(0); - MIB.addReg(0); - } else if (LocIdxToLocID[*MLoc] >= NumRegs) { - unsigned LocID = LocIdxToLocID[*MLoc]; - SpillLocationNo SpillID = locIDToSpill(LocID); - StackSlotPos StackIdx = locIDToSpillIdx(LocID); - unsigned short Offset = StackIdx.second; - - // TODO: support variables that are located in spill slots, with non-zero - // offsets from the start of the spill slot. It would require some more - // complex DIExpression calculations. This doesn't seem to be produced by - // LLVM right now, so don't try and support it. - // Accept no-subregister slots and subregisters where the offset is zero. - // The consumer should already have type information to work out how large - // the variable is. - if (Offset == 0) { - const SpillLoc &Spill = SpillLocs[SpillID.id()]; - unsigned Base = Spill.SpillBase; - MIB.addReg(Base); - - // There are several ways we can dereference things, and several inputs - // to consider: - // * NRVO variables will appear with IsIndirect set, but should have - // nothing else in their DIExpressions, - // * Variables with DW_OP_stack_value in their expr already need an - // explicit dereference of the stack location, - // * Values that don't match the variable size need DW_OP_deref_size, - // * Everything else can just become a simple location expression. - - // We need to use deref_size whenever there's a mismatch between the - // size of value and the size of variable portion being read. - // Additionally, we should use it whenever dealing with stack_value - // fragments, to avoid the consumer having to determine the deref size - // from DW_OP_piece. - bool UseDerefSize = false; - unsigned ValueSizeInBits = getLocSizeInBits(*MLoc); - unsigned DerefSizeInBytes = ValueSizeInBits / 8; - if (auto Fragment = Var.getFragment()) { - unsigned VariableSizeInBits = Fragment->SizeInBits; - if (VariableSizeInBits != ValueSizeInBits || Expr->isComplex()) - UseDerefSize = true; - } else if (auto Size = Var.getVariable()->getSizeInBits()) { - if (*Size != ValueSizeInBits) { - UseDerefSize = true; + + assert(DbgOps.size() == Properties.getLocationOpCount()); + + // If all locations are valid, accumulate them into our list of + // MachineOperands. For any spilled locations, either update the indirectness + // register or apply the appropriate transformations in the DIExpression. + for (size_t Idx = 0; Idx < Properties.getLocationOpCount(); ++Idx) { + const ResolvedDbgOp &Op = DbgOps[Idx]; + + if (Op.IsConst) { + MOs.push_back(Op.MO); + continue; + } + + LocIdx MLoc = Op.Loc; + unsigned LocID = LocIdxToLocID[MLoc]; + if (LocID >= NumRegs) { + SpillLocationNo SpillID = locIDToSpill(LocID); + StackSlotPos StackIdx = locIDToSpillIdx(LocID); + unsigned short Offset = StackIdx.second; + + // TODO: support variables that are located in spill slots, with non-zero + // offsets from the start of the spill slot. It would require some more + // complex DIExpression calculations. This doesn't seem to be produced by + // LLVM right now, so don't try and support it. + // Accept no-subregister slots and subregisters where the offset is zero. + // The consumer should already have type information to work out how large + // the variable is. + if (Offset == 0) { + const SpillLoc &Spill = SpillLocs[SpillID.id()]; + unsigned Base = Spill.SpillBase; + + // There are several ways we can dereference things, and several inputs + // to consider: + // * NRVO variables will appear with IsIndirect set, but should have + // nothing else in their DIExpressions, + // * Variables with DW_OP_stack_value in their expr already need an + // explicit dereference of the stack location, + // * Values that don't match the variable size need DW_OP_deref_size, + // * Everything else can just become a simple location expression. + + // We need to use deref_size whenever there's a mismatch between the + // size of value and the size of variable portion being read. + // Additionally, we should use it whenever dealing with stack_value + // fragments, to avoid the consumer having to determine the deref size + // from DW_OP_piece. + bool UseDerefSize = false; + unsigned ValueSizeInBits = getLocSizeInBits(MLoc); + unsigned DerefSizeInBytes = ValueSizeInBits / 8; + if (auto Fragment = Var.getFragment()) { + unsigned VariableSizeInBits = Fragment->SizeInBits; + if (VariableSizeInBits != ValueSizeInBits || Expr->isComplex()) + UseDerefSize = true; + } else if (auto Size = Var.getVariable()->getSizeInBits()) { + if (*Size != ValueSizeInBits) { + UseDerefSize = true; + } } - } - if (Properties.Indirect) { - // This is something like an NRVO variable, where the pointer has been - // spilt to the stack, or a dbg.addr pointing at a coroutine frame - // field. It should end up being a memory location, with the pointer - // to the variable loaded off the stack with a deref. It can't be a - // DW_OP_stack_value expression. - assert(!Expr->isImplicit()); - Expr = TRI.prependOffsetExpression( - Expr, DIExpression::ApplyOffset | DIExpression::DerefAfter, - Spill.SpillOffset); - MIB.addImm(0); - } else if (UseDerefSize) { - // We're loading a value off the stack that's not the same size as the - // variable. Add / subtract stack offset, explicitly deref with a size, - // and add DW_OP_stack_value if not already present. - SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, - DerefSizeInBytes}; - Expr = DIExpression::prependOpcodes(Expr, Ops, true); - unsigned Flags = DIExpression::StackValue | DIExpression::ApplyOffset; - Expr = TRI.prependOffsetExpression(Expr, Flags, Spill.SpillOffset); - MIB.addReg(0); - } else if (Expr->isComplex()) { - // A variable with no size ambiguity, but with extra elements in it's - // expression. Manually dereference the stack location. - assert(Expr->isComplex()); - Expr = TRI.prependOffsetExpression( - Expr, DIExpression::ApplyOffset | DIExpression::DerefAfter, - Spill.SpillOffset); - MIB.addReg(0); + SmallVector<uint64_t, 5> OffsetOps; + TRI.getOffsetOpcodes(Spill.SpillOffset, OffsetOps); + bool StackValue = false; + + if (Properties.Indirect) { + // This is something like an NRVO variable, where the pointer has been + // spilt to the stack. It should end up being a memory location, with + // the pointer to the variable loaded off the stack with a deref: + assert(!Expr->isImplicit()); + OffsetOps.push_back(dwarf::DW_OP_deref); + } else if (UseDerefSize && Expr->isSingleLocationExpression()) { + // TODO: Figure out how to handle deref size issues for variadic + // values. + // We're loading a value off the stack that's not the same size as the + // variable. Add / subtract stack offset, explicitly deref with a + // size, and add DW_OP_stack_value if not already present. + OffsetOps.push_back(dwarf::DW_OP_deref_size); + OffsetOps.push_back(DerefSizeInBytes); + StackValue = true; + } else if (Expr->isComplex() || Properties.IsVariadic) { + // A variable with no size ambiguity, but with extra elements in it's + // expression. Manually dereference the stack location. + OffsetOps.push_back(dwarf::DW_OP_deref); + } else { + // A plain value that has been spilt to the stack, with no further + // context. Request a location expression, marking the DBG_VALUE as + // IsIndirect. + Indirect = true; + } + + Expr = DIExpression::appendOpsToArg(Expr, OffsetOps, Idx, StackValue); + MOs.push_back(GetRegOp(Base)); } else { - // A plain value that has been spilt to the stack, with no further - // context. Request a location expression, marking the DBG_VALUE as - // IsIndirect. - Expr = TRI.prependOffsetExpression(Expr, DIExpression::ApplyOffset, - Spill.SpillOffset); - MIB.addImm(0); + // This is a stack location with a weird subregister offset: emit an + // undef DBG_VALUE instead. + return EmitUndef(); } } else { - // This is a stack location with a weird subregister offset: emit an undef - // DBG_VALUE instead. - MIB.addReg(0); - MIB.addReg(0); + // Non-empty, non-stack slot, must be a plain register. + MOs.push_back(GetRegOp(LocID)); } - } else { - // Non-empty, non-stack slot, must be a plain register. - unsigned LocID = LocIdxToLocID[*MLoc]; - MIB.addReg(LocID); - if (Properties.Indirect) - MIB.addImm(0); - else - MIB.addReg(0); } - MIB.addMetadata(Var.getVariable()); - MIB.addMetadata(Expr); - return MIB; + return BuildMI(MF, DL, Desc, Indirect, MOs, Var.getVariable(), Expr); } /// Default construct and initialize the pass. @@ -974,7 +1312,10 @@ InstrRefBasedLDV::InstrRefBasedLDV() = default; bool InstrRefBasedLDV::isCalleeSaved(LocIdx L) const { unsigned Reg = MTracker->LocIdxToLocID[L]; - for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI) + return isCalleeSavedReg(Reg); +} +bool InstrRefBasedLDV::isCalleeSavedReg(Register R) const { + for (MCRegAliasIterator RAI(R, TRI, true); RAI.isValid(); ++RAI) if (CalleeSavedRegs.test(*RAI)) return true; return false; @@ -989,7 +1330,7 @@ bool InstrRefBasedLDV::isCalleeSaved(LocIdx L) const { // void InstrRefBasedLDV::printVarLocInMBB(..) #endif -Optional<SpillLocationNo> +std::optional<SpillLocationNo> InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) { assert(MI.hasOneMemOperand() && "Spill instruction does not have exactly one memory operand?"); @@ -1004,11 +1345,11 @@ InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) { return MTracker->getOrTrackSpillLoc({Reg, Offset}); } -Optional<LocIdx> +std::optional<LocIdx> InstrRefBasedLDV::findLocationForMemOperand(const MachineInstr &MI) { - Optional<SpillLocationNo> SpillLoc = extractSpillBaseRegAndOffset(MI); + std::optional<SpillLocationNo> SpillLoc = extractSpillBaseRegAndOffset(MI); if (!SpillLoc) - return None; + return std::nullopt; // Where in the stack slot is this value defined -- i.e., what size of value // is this? An important question, because it could be loaded into a register @@ -1022,7 +1363,7 @@ InstrRefBasedLDV::findLocationForMemOperand(const MachineInstr &MI) { if (IdxIt == MTracker->StackSlotIdxes.end()) // That index is not tracked. This is suprising, and unlikely to ever // occur, but the safe action is to indicate the variable is optimised out. - return None; + return std::nullopt; unsigned SpillID = MTracker->getSpillIDWithIdx(*SpillLoc, IdxIt->second); return MTracker->getSpillMLoc(SpillID); @@ -1050,39 +1391,33 @@ bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) { if (Scope == nullptr) return true; // handled it; by doing nothing - // For now, ignore DBG_VALUE_LISTs when extending ranges. Allow it to - // contribute to locations in this block, but don't propagate further. - // Interpret it like a DBG_VALUE $noreg. - if (MI.isDebugValueList()) { - if (VTracker) - VTracker->defVar(MI, Properties, None); - if (TTracker) - TTracker->redefVar(MI, Properties, None); - return true; - } - - const MachineOperand &MO = MI.getOperand(0); - // MLocTracker needs to know that this register is read, even if it's only // read by a debug inst. - if (MO.isReg() && MO.getReg() != 0) - (void)MTracker->readReg(MO.getReg()); + for (const MachineOperand &MO : MI.debug_operands()) + if (MO.isReg() && MO.getReg() != 0) + (void)MTracker->readReg(MO.getReg()); // If we're preparing for the second analysis (variables), the machine value // locations are already solved, and we report this DBG_VALUE and the value // it refers to to VLocTracker. if (VTracker) { - if (MO.isReg()) { - // Feed defVar the new variable location, or if this is a - // DBG_VALUE $noreg, feed defVar None. - if (MO.getReg()) - VTracker->defVar(MI, Properties, MTracker->readReg(MO.getReg())); - else - VTracker->defVar(MI, Properties, None); - } else if (MI.getOperand(0).isImm() || MI.getOperand(0).isFPImm() || - MI.getOperand(0).isCImm()) { - VTracker->defVar(MI, MI.getOperand(0)); + SmallVector<DbgOpID> DebugOps; + // Feed defVar the new variable location, or if this is a DBG_VALUE $noreg, + // feed defVar None. + if (!MI.isUndefDebugValue()) { + for (const MachineOperand &MO : MI.debug_operands()) { + // There should be no undef registers here, as we've screened for undef + // debug values. + if (MO.isReg()) { + DebugOps.push_back(DbgOpStore.insert(MTracker->readReg(MO.getReg()))); + } else if (MO.isImm() || MO.isFPImm() || MO.isCImm()) { + DebugOps.push_back(DbgOpStore.insert(MO)); + } else { + llvm_unreachable("Unexpected debug operand type."); + } + } } + VTracker->defVar(MI, Properties, DebugOps); } // If performing final tracking of transfers, report this variable definition @@ -1092,39 +1427,14 @@ bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) { return true; } -bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, - const ValueTable *MLiveOuts, - const ValueTable *MLiveIns) { - if (!MI.isDebugRef()) - return false; - - // Only handle this instruction when we are building the variable value - // transfer function. - if (!VTracker && !TTracker) - return false; - - unsigned InstNo = MI.getOperand(0).getImm(); - unsigned OpNo = MI.getOperand(1).getImm(); - - const DILocalVariable *Var = MI.getDebugVariable(); - const DIExpression *Expr = MI.getDebugExpression(); - const DILocation *DebugLoc = MI.getDebugLoc(); - const DILocation *InlinedAt = DebugLoc->getInlinedAt(); - assert(Var->isValidLocationForIntrinsic(DebugLoc) && - "Expected inlined-at fields to agree"); - - DebugVariable V(Var, Expr, InlinedAt); - - auto *Scope = LS.findLexicalScope(MI.getDebugLoc().get()); - if (Scope == nullptr) - return true; // Handled by doing nothing. This variable is never in scope. - - const MachineFunction &MF = *MI.getParent()->getParent(); - +std::optional<ValueIDNum> InstrRefBasedLDV::getValueForInstrRef( + unsigned InstNo, unsigned OpNo, MachineInstr &MI, + const ValueTable *MLiveOuts, const ValueTable *MLiveIns) { // Various optimizations may have happened to the value during codegen, // recorded in the value substitution table. Apply any substitutions to // the instruction / operand number in this DBG_INSTR_REF, and collect // any subregister extractions performed during optimization. + const MachineFunction &MF = *MI.getParent()->getParent(); // Create dummy substitution with Src set, for lookup. auto SoughtSub = @@ -1143,13 +1453,12 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, // Default machine value number is <None> -- if no instruction defines // the corresponding value, it must have been optimized out. - Optional<ValueIDNum> NewID = None; + std::optional<ValueIDNum> NewID; // Try to lookup the instruction number, and find the machine value number // that it defines. It could be an instruction, or a PHI. auto InstrIt = DebugInstrNumToInstr.find(InstNo); - auto PHIIt = std::lower_bound(DebugPHINumToValue.begin(), - DebugPHINumToValue.end(), InstNo); + auto PHIIt = llvm::lower_bound(DebugPHINumToValue, InstNo); if (InstrIt != DebugInstrNumToInstr.end()) { const MachineInstr &TargetInstr = *InstrIt->second.first; uint64_t BlockNo = TargetInstr.getParent()->getNumber(); @@ -1158,7 +1467,7 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, // a register def was folded into a stack store. if (OpNo == MachineFunction::DebugOperandMemNumber && TargetInstr.hasOneMemOperand()) { - Optional<LocIdx> L = findLocationForMemOperand(TargetInstr); + std::optional<LocIdx> L = findLocationForMemOperand(TargetInstr); if (L) NewID = ValueIDNum(BlockNo, InstrIt->second.second, *L); } else if (OpNo != MachineFunction::DebugOperandMemNumber) { @@ -1247,7 +1556,7 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, // If we didn't find anything: there's no way to express our value. if (!NewReg) { - NewID = None; + NewID = std::nullopt; } else { // Re-state the value as being defined within the subregister // that we found. @@ -1257,61 +1566,162 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, } } else { // If we can't handle subregisters, unset the new value. - NewID = None; + NewID = std::nullopt; } } - // We, we have a value number or None. Tell the variable value tracker about - // it. The rest of this LiveDebugValues implementation acts exactly the same - // for DBG_INSTR_REFs as DBG_VALUEs (just, the former can refer to values that - // aren't immediately available). - DbgValueProperties Properties(Expr, false); + return NewID; +} + +bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, + const ValueTable *MLiveOuts, + const ValueTable *MLiveIns) { + if (!MI.isDebugRef()) + return false; + + // Only handle this instruction when we are building the variable value + // transfer function. + if (!VTracker && !TTracker) + return false; + + const DILocalVariable *Var = MI.getDebugVariable(); + const DIExpression *Expr = MI.getDebugExpression(); + const DILocation *DebugLoc = MI.getDebugLoc(); + const DILocation *InlinedAt = DebugLoc->getInlinedAt(); + assert(Var->isValidLocationForIntrinsic(DebugLoc) && + "Expected inlined-at fields to agree"); + + DebugVariable V(Var, Expr, InlinedAt); + + auto *Scope = LS.findLexicalScope(MI.getDebugLoc().get()); + if (Scope == nullptr) + return true; // Handled by doing nothing. This variable is never in scope. + + SmallVector<DbgOpID> DbgOpIDs; + for (const MachineOperand &MO : MI.debug_operands()) { + if (!MO.isDbgInstrRef()) { + assert(!MO.isReg() && "DBG_INSTR_REF should not contain registers"); + DbgOpID ConstOpID = DbgOpStore.insert(DbgOp(MO)); + DbgOpIDs.push_back(ConstOpID); + continue; + } + + unsigned InstNo = MO.getInstrRefInstrIndex(); + unsigned OpNo = MO.getInstrRefOpIndex(); + + // Default machine value number is <None> -- if no instruction defines + // the corresponding value, it must have been optimized out. + std::optional<ValueIDNum> NewID = + getValueForInstrRef(InstNo, OpNo, MI, MLiveOuts, MLiveIns); + // We have a value number or std::nullopt. If the latter, then kill the + // entire debug value. + if (NewID) { + DbgOpIDs.push_back(DbgOpStore.insert(*NewID)); + } else { + DbgOpIDs.clear(); + break; + } + } + + // We have a DbgOpID for every value or for none. Tell the variable value + // tracker about it. The rest of this LiveDebugValues implementation acts + // exactly the same for DBG_INSTR_REFs as DBG_VALUEs (just, the former can + // refer to values that aren't immediately available). + DbgValueProperties Properties(Expr, false, true); if (VTracker) - VTracker->defVar(MI, Properties, NewID); + VTracker->defVar(MI, Properties, DbgOpIDs); // If we're on the final pass through the function, decompose this INSTR_REF // into a plain DBG_VALUE. if (!TTracker) return true; + // Fetch the concrete DbgOps now, as we will need them later. + SmallVector<DbgOp> DbgOps; + for (DbgOpID OpID : DbgOpIDs) { + DbgOps.push_back(DbgOpStore.find(OpID)); + } + // Pick a location for the machine value number, if such a location exists. // (This information could be stored in TransferTracker to make it faster). - Optional<LocIdx> FoundLoc = None; + SmallDenseMap<ValueIDNum, TransferTracker::LocationAndQuality> FoundLocs; + SmallVector<ValueIDNum> ValuesToFind; + // Initialized the preferred-location map with illegal locations, to be + // filled in later. + for (const DbgOp &Op : DbgOps) { + if (!Op.IsConst) + if (FoundLocs.insert({Op.ID, TransferTracker::LocationAndQuality()}) + .second) + ValuesToFind.push_back(Op.ID); + } + for (auto Location : MTracker->locations()) { LocIdx CurL = Location.Idx; ValueIDNum ID = MTracker->readMLoc(CurL); - if (NewID && ID == NewID) { - // If this is the first location with that value, pick it. Otherwise, - // consider whether it's a "longer term" location. - if (!FoundLoc) { - FoundLoc = CurL; - continue; + auto ValueToFindIt = find(ValuesToFind, ID); + if (ValueToFindIt == ValuesToFind.end()) + continue; + auto &Previous = FoundLocs.find(ID)->second; + // If this is the first location with that value, pick it. Otherwise, + // consider whether it's a "longer term" location. + std::optional<TransferTracker::LocationQuality> ReplacementQuality = + TTracker->getLocQualityIfBetter(CurL, Previous.getQuality()); + if (ReplacementQuality) { + Previous = TransferTracker::LocationAndQuality(CurL, *ReplacementQuality); + if (Previous.isBest()) { + ValuesToFind.erase(ValueToFindIt); + if (ValuesToFind.empty()) + break; } - - if (MTracker->isSpill(CurL)) - FoundLoc = CurL; // Spills are a longer term location. - else if (!MTracker->isSpill(*FoundLoc) && - !MTracker->isSpill(CurL) && - !isCalleeSaved(*FoundLoc) && - isCalleeSaved(CurL)) - FoundLoc = CurL; // Callee saved regs are longer term than normal. } } + SmallVector<ResolvedDbgOp> NewLocs; + for (const DbgOp &DbgOp : DbgOps) { + if (DbgOp.IsConst) { + NewLocs.push_back(DbgOp.MO); + continue; + } + LocIdx FoundLoc = FoundLocs.find(DbgOp.ID)->second.getLoc(); + if (FoundLoc.isIllegal()) { + NewLocs.clear(); + break; + } + NewLocs.push_back(FoundLoc); + } // Tell transfer tracker that the variable value has changed. - TTracker->redefVar(MI, Properties, FoundLoc); - - // If there was a value with no location; but the value is defined in a - // later instruction in this block, this is a block-local use-before-def. - if (!FoundLoc && NewID && NewID->getBlock() == CurBB && - NewID->getInst() > CurInst) - TTracker->addUseBeforeDef(V, {MI.getDebugExpression(), false}, *NewID); + TTracker->redefVar(MI, Properties, NewLocs); + + // If there were values with no location, but all such values are defined in + // later instructions in this block, this is a block-local use-before-def. + if (!DbgOps.empty() && NewLocs.empty()) { + bool IsValidUseBeforeDef = true; + uint64_t LastUseBeforeDef = 0; + for (auto ValueLoc : FoundLocs) { + ValueIDNum NewID = ValueLoc.first; + LocIdx FoundLoc = ValueLoc.second.getLoc(); + if (!FoundLoc.isIllegal()) + continue; + // If we have an value with no location that is not defined in this block, + // then it has no location in this block, leaving this value undefined. + if (NewID.getBlock() != CurBB || NewID.getInst() <= CurInst) { + IsValidUseBeforeDef = false; + break; + } + LastUseBeforeDef = std::max(LastUseBeforeDef, NewID.getInst()); + } + if (IsValidUseBeforeDef) { + TTracker->addUseBeforeDef(V, {MI.getDebugExpression(), false, true}, + DbgOps, LastUseBeforeDef); + } + } // Produce a DBG_VALUE representing what this DBG_INSTR_REF meant. // This DBG_VALUE is potentially a $noreg / undefined location, if - // FoundLoc is None. + // FoundLoc is illegal. // (XXX -- could morph the DBG_INSTR_REF in the future). - MachineInstr *DbgMI = MTracker->emitLoc(FoundLoc, V, Properties); + MachineInstr *DbgMI = MTracker->emitLoc(NewLocs, V, Properties); + TTracker->PendingDbgValues.push_back(DbgMI); TTracker->flushDbgValues(MI.getIterator(), nullptr); return true; @@ -1335,7 +1745,8 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) { // a DBG_PHI. This can happen if DBG_PHIs are malformed, or refer to a // dead stack slot, for example. // Record a DebugPHIRecord with an empty value + location. - DebugPHINumToValue.push_back({InstrNum, MI.getParent(), None, None}); + DebugPHINumToValue.push_back( + {InstrNum, MI.getParent(), std::nullopt, std::nullopt}); return true; }; @@ -1364,7 +1775,7 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) { Register Base; StackOffset Offs = TFI->getFrameIndexReference(*MI.getMF(), FI, Base); SpillLoc SL = {Base, Offs}; - Optional<SpillLocationNo> SpillNo = MTracker->getOrTrackSpillLoc(SL); + std::optional<SpillLocationNo> SpillNo = MTracker->getOrTrackSpillLoc(SL); // We might be able to find a value, but have chosen not to, to avoid // tracking too much stack information. @@ -1437,8 +1848,7 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { SmallVector<const MachineOperand *, 4> RegMaskPtrs; for (const MachineOperand &MO : MI.operands()) { // Determine whether the operand is a register def. - if (MO.isReg() && MO.isDef() && MO.getReg() && - Register::isPhysicalRegister(MO.getReg()) && + if (MO.isReg() && MO.isDef() && MO.getReg() && MO.getReg().isPhysical() && !IgnoreSPAlias(MO.getReg())) { // Remove ranges of all aliased registers. for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) @@ -1459,7 +1869,8 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { // If this instruction writes to a spill slot, def that slot. if (hasFoldedStackStore(MI)) { - if (Optional<SpillLocationNo> SpillNo = extractSpillBaseRegAndOffset(MI)) { + if (std::optional<SpillLocationNo> SpillNo = + extractSpillBaseRegAndOffset(MI)) { for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) { unsigned SpillID = MTracker->getSpillIDWithIdx(*SpillNo, I); LocIdx L = MTracker->getSpillMLoc(SpillID); @@ -1501,7 +1912,8 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { // Tell TTracker about any folded stack store. if (hasFoldedStackStore(MI)) { - if (Optional<SpillLocationNo> SpillNo = extractSpillBaseRegAndOffset(MI)) { + if (std::optional<SpillLocationNo> SpillNo = + extractSpillBaseRegAndOffset(MI)) { for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) { unsigned SpillID = MTracker->getSpillIDWithIdx(*SpillNo, I); LocIdx L = MTracker->getSpillMLoc(SpillID); @@ -1542,22 +1954,22 @@ void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) { } } -Optional<SpillLocationNo> +std::optional<SpillLocationNo> InstrRefBasedLDV::isSpillInstruction(const MachineInstr &MI, MachineFunction *MF) { // TODO: Handle multiple stores folded into one. if (!MI.hasOneMemOperand()) - return None; + return std::nullopt; // Reject any memory operand that's aliased -- we can't guarantee its value. auto MMOI = MI.memoperands_begin(); const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue(); if (PVal->isAliased(MFI)) - return None; + return std::nullopt; if (!MI.getSpillSize(TII) && !MI.getFoldedSpillSize(TII)) - return None; // This is not a spill instruction, since no valid size was - // returned from either function. + return std::nullopt; // This is not a spill instruction, since no valid size + // was returned from either function. return extractSpillBaseRegAndOffset(MI); } @@ -1572,11 +1984,11 @@ bool InstrRefBasedLDV::isLocationSpill(const MachineInstr &MI, return Reg != 0; } -Optional<SpillLocationNo> +std::optional<SpillLocationNo> InstrRefBasedLDV::isRestoreInstruction(const MachineInstr &MI, MachineFunction *MF, unsigned &Reg) { if (!MI.hasOneMemOperand()) - return None; + return std::nullopt; // FIXME: Handle folded restore instructions with more than one memory // operand. @@ -1584,7 +1996,7 @@ InstrRefBasedLDV::isRestoreInstruction(const MachineInstr &MI, Reg = MI.getOperand(0).getReg(); return extractSpillBaseRegAndOffset(MI); } - return None; + return std::nullopt; } bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) { @@ -1616,12 +2028,12 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) { // First, if there are any DBG_VALUEs pointing at a spill slot that is // written to, terminate that variable location. The value in memory // will have changed. DbgEntityHistoryCalculator doesn't try to detect this. - if (Optional<SpillLocationNo> Loc = isSpillInstruction(MI, MF)) { + if (std::optional<SpillLocationNo> Loc = isSpillInstruction(MI, MF)) { // Un-set this location and clobber, so that earlier locations don't // continue past this store. for (unsigned SlotIdx = 0; SlotIdx < MTracker->NumSlotIdxes; ++SlotIdx) { unsigned SpillID = MTracker->getSpillIDWithIdx(*Loc, SlotIdx); - Optional<LocIdx> MLoc = MTracker->getSpillMLoc(SpillID); + std::optional<LocIdx> MLoc = MTracker->getSpillMLoc(SpillID); if (!MLoc) continue; @@ -1667,7 +2079,7 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) { unsigned SpillID = MTracker->getLocID(Loc, {Size, 0}); DoTransfer(Reg, SpillID); } else { - Optional<SpillLocationNo> Loc = isRestoreInstruction(MI, MF, Reg); + std::optional<SpillLocationNo> Loc = isRestoreInstruction(MI, MF, Reg); if (!Loc) return false; @@ -1711,13 +2123,6 @@ bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) { const MachineOperand *DestRegOp = DestSrc->Destination; const MachineOperand *SrcRegOp = DestSrc->Source; - auto isCalleeSavedReg = [&](unsigned Reg) { - for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI) - if (CalleeSavedRegs.test(*RAI)) - return true; - return false; - }; - Register SrcReg = SrcRegOp->getReg(); Register DestReg = DestRegOp->getReg(); @@ -1791,7 +2196,7 @@ bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) { /// \param MI A previously unprocessed debug instruction to analyze for /// fragment usage. void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) { - assert(MI.isDebugValue() || MI.isDebugRef()); + assert(MI.isDebugValueLike()); DebugVariable MIVar(MI.getDebugVariable(), MI.getDebugExpression(), MI.getDebugLoc()->getInlinedAt()); FragmentInfo ThisFragment = MIVar.getFragmentOrDefault(); @@ -1896,7 +2301,7 @@ void InstrRefBasedLDV::produceMLocTransferFunction( process(MI, nullptr, nullptr); // Also accumulate fragment map. - if (MI.isDebugValue() || MI.isDebugRef()) + if (MI.isDebugValueLike()) accumulateFragmentMap(MI); // Create a map from the instruction number (if present) to the @@ -1931,7 +2336,7 @@ void InstrRefBasedLDV::produceMLocTransferFunction( Result.first->second = P; } - // Accumulate any bitmask operands into the clobberred reg mask for this + // Accumulate any bitmask operands into the clobbered reg mask for this // block. for (auto &P : MTracker->Masks) { BlockMasks[CurBB].clearBitsNotInMask(P.first->getRegMask(), BVWords); @@ -2353,33 +2758,104 @@ void InstrRefBasedLDV::BlockPHIPlacement( IDF.calculate(PHIBlocks); } -Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc( - const MachineBasicBlock &MBB, const DebugVariable &Var, +bool InstrRefBasedLDV::pickVPHILoc( + SmallVectorImpl<DbgOpID> &OutValues, const MachineBasicBlock &MBB, const LiveIdxT &LiveOuts, FuncValueTable &MOutLocs, const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders) { - // Collect a set of locations from predecessor where its live-out value can - // be found. - SmallVector<SmallVector<LocIdx, 4>, 8> Locs; - SmallVector<const DbgValueProperties *, 4> Properties; - unsigned NumLocs = MTracker->getNumLocs(); // No predecessors means no PHIs. if (BlockOrders.empty()) - return None; + return false; - for (const auto *p : BlockOrders) { - unsigned ThisBBNum = p->getNumber(); + // All the location operands that do not already agree need to be joined, + // track the indices of each such location operand here. + SmallDenseSet<unsigned> LocOpsToJoin; + + auto FirstValueIt = LiveOuts.find(BlockOrders[0]); + if (FirstValueIt == LiveOuts.end()) + return false; + const DbgValue &FirstValue = *FirstValueIt->second; + + for (const auto p : BlockOrders) { auto OutValIt = LiveOuts.find(p); if (OutValIt == LiveOuts.end()) // If we have a predecessor not in scope, we'll never find a PHI position. - return None; + return false; const DbgValue &OutVal = *OutValIt->second; - if (OutVal.Kind == DbgValue::Const || OutVal.Kind == DbgValue::NoVal) - // Consts and no-values cannot have locations we can join on. - return None; + // No-values cannot have locations we can join on. + if (OutVal.Kind == DbgValue::NoVal) + return false; + + // For unjoined VPHIs where we don't know the location, we definitely + // can't find a join loc unless the VPHI is a backedge. + if (OutVal.isUnjoinedPHI() && OutVal.BlockNo != MBB.getNumber()) + return false; + + if (!FirstValue.Properties.isJoinable(OutVal.Properties)) + return false; + + for (unsigned Idx = 0; Idx < FirstValue.getLocationOpCount(); ++Idx) { + // An unjoined PHI has no defined locations, and so a shared location must + // be found for every operand. + if (OutVal.isUnjoinedPHI()) { + LocOpsToJoin.insert(Idx); + continue; + } + DbgOpID FirstValOp = FirstValue.getDbgOpID(Idx); + DbgOpID OutValOp = OutVal.getDbgOpID(Idx); + if (FirstValOp != OutValOp) { + // We can never join constant ops - the ops must either both be equal + // constant ops or non-const ops. + if (FirstValOp.isConst() || OutValOp.isConst()) + return false; + else + LocOpsToJoin.insert(Idx); + } + } + } + + SmallVector<DbgOpID> NewDbgOps; - Properties.push_back(&OutVal.Properties); + for (unsigned Idx = 0; Idx < FirstValue.getLocationOpCount(); ++Idx) { + // If this op doesn't need to be joined because the values agree, use that + // already-agreed value. + if (!LocOpsToJoin.contains(Idx)) { + NewDbgOps.push_back(FirstValue.getDbgOpID(Idx)); + continue; + } + + std::optional<ValueIDNum> JoinedOpLoc = + pickOperandPHILoc(Idx, MBB, LiveOuts, MOutLocs, BlockOrders); + + if (!JoinedOpLoc) + return false; + + NewDbgOps.push_back(DbgOpStore.insert(*JoinedOpLoc)); + } + + OutValues.append(NewDbgOps); + return true; +} + +std::optional<ValueIDNum> InstrRefBasedLDV::pickOperandPHILoc( + unsigned DbgOpIdx, const MachineBasicBlock &MBB, const LiveIdxT &LiveOuts, + FuncValueTable &MOutLocs, + const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders) { + + // Collect a set of locations from predecessor where its live-out value can + // be found. + SmallVector<SmallVector<LocIdx, 4>, 8> Locs; + unsigned NumLocs = MTracker->getNumLocs(); + + for (const auto p : BlockOrders) { + unsigned ThisBBNum = p->getNumber(); + auto OutValIt = LiveOuts.find(p); + assert(OutValIt != LiveOuts.end()); + const DbgValue &OutVal = *OutValIt->second; + DbgOpID OutValOpID = OutVal.getDbgOpID(DbgOpIdx); + DbgOp OutValOp = DbgOpStore.find(OutValOpID); + assert(!OutValOp.IsConst); // Create new empty vector of locations. Locs.resize(Locs.size() + 1); @@ -2388,8 +2864,8 @@ Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc( // present. Do the same for VPHIs where we know the VPHI value. if (OutVal.Kind == DbgValue::Def || (OutVal.Kind == DbgValue::VPHI && OutVal.BlockNo != MBB.getNumber() && - OutVal.ID != ValueIDNum::EmptyValue)) { - ValueIDNum ValToLookFor = OutVal.ID; + !OutValOp.isUndef())) { + ValueIDNum ValToLookFor = OutValOp.ID; // Search the live-outs of the predecessor for the specified value. for (unsigned int I = 0; I < NumLocs; ++I) { if (MOutLocs[ThisBBNum][I] == ValToLookFor) @@ -2397,11 +2873,6 @@ Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc( } } else { assert(OutVal.Kind == DbgValue::VPHI); - // For VPHIs where we don't know the location, we definitely can't find - // a join loc. - if (OutVal.BlockNo != MBB.getNumber()) - return None; - // Otherwise: this is a VPHI on a backedge feeding back into itself, i.e. // a value that's live-through the whole loop. (It has to be a backedge, // because a block can't dominate itself). We can accept as a PHI location @@ -2415,17 +2886,9 @@ Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc( } } } - // We should have found locations for all predecessors, or returned. assert(Locs.size() == BlockOrders.size()); - // Check that all properties are the same. We can't pick a location if they're - // not. - const DbgValueProperties *Properties0 = Properties[0]; - for (const auto *Prop : Properties) - if (*Prop != *Properties0) - return None; - // Starting with the first set of locations, take the intersection with // subsequent sets. SmallVector<LocIdx, 4> CandidateLocs = Locs[0]; @@ -2437,7 +2900,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc( CandidateLocs = NewCandidates; } if (CandidateLocs.empty()) - return None; + return std::nullopt; // We now have a set of LocIdxes that contain the right output value in // each of the predecessors. Pick the lowest; if there's a register loc, @@ -2516,12 +2979,12 @@ bool InstrRefBasedLDV::vlocJoin( // Scan for variable values that can never be resolved: if they have // different DIExpressions, different indirectness, or are mixed constants / // non-constants. - for (auto &V : Values) { - if (V.second->Properties != FirstVal.Properties) + for (const auto &V : Values) { + if (!V.second->Properties.isJoinable(FirstVal.Properties)) return false; if (V.second->Kind == DbgValue::NoVal) return false; - if (V.second->Kind == DbgValue::Const && FirstVal.Kind != DbgValue::Const) + if (!V.second->hasJoinableLocOps(FirstVal)) return false; } @@ -2531,6 +2994,12 @@ bool InstrRefBasedLDV::vlocJoin( if (*V.second == FirstVal) continue; // No disagreement. + // If both values are not equal but have equal non-empty IDs then they refer + // to the same value from different sources (e.g. one is VPHI and the other + // is Def), which does not cause disagreement. + if (V.second->hasIdenticalValidLocOps(FirstVal)) + continue; + // Eliminate if a backedge feeds a VPHI back into itself. if (V.second->Kind == DbgValue::VPHI && V.second->BlockNo == MBB.getNumber() && @@ -2675,7 +3144,7 @@ void InstrRefBasedLDV::buildVLocValueMap( // Initialize all values to start as NoVals. This signifies "it's live // through, but we don't know what it is". - DbgValueProperties EmptyProperties(EmptyExpr, false); + DbgValueProperties EmptyProperties(EmptyExpr, false, false); for (unsigned int I = 0; I < NumBlocks; ++I) { DbgValue EmptyDbgValue(I, EmptyProperties, DbgValue::NoVal); LiveIns.push_back(EmptyDbgValue); @@ -2775,12 +3244,13 @@ void InstrRefBasedLDV::buildVLocValueMap( // eliminated and transitions from VPHI-with-location to // live-through-value. As a result, the selected location of any VPHI // might change, so we need to re-compute it on each iteration. - Optional<ValueIDNum> ValueNum = - pickVPHILoc(*MBB, Var, LiveOutIdx, MOutLocs, Preds); + SmallVector<DbgOpID> JoinedOps; - if (ValueNum) { - InLocsChanged |= LiveIn->ID != *ValueNum; - LiveIn->ID = *ValueNum; + if (pickVPHILoc(JoinedOps, *MBB, LiveOutIdx, MOutLocs, Preds)) { + bool NewLocPicked = !equal(LiveIn->getDbgOpIDs(), JoinedOps); + InLocsChanged |= NewLocPicked; + if (NewLocPicked) + LiveIn->setDbgOpIDs(JoinedOps); } } @@ -2850,8 +3320,7 @@ void InstrRefBasedLDV::buildVLocValueMap( DbgValue *BlockLiveIn = LiveInIdx[MBB]; if (BlockLiveIn->Kind == DbgValue::NoVal) continue; - if (BlockLiveIn->Kind == DbgValue::VPHI && - BlockLiveIn->ID == ValueIDNum::EmptyValue) + if (BlockLiveIn->isUnjoinedPHI()) continue; if (BlockLiveIn->Kind == DbgValue::VPHI) BlockLiveIn->Kind = DbgValue::Def; @@ -2933,12 +3402,17 @@ void InstrRefBasedLDV::initialSetup(MachineFunction &MF) { // Compute mappings of block <=> RPO order. ReversePostOrderTraversal<MachineFunction *> RPOT(&MF); unsigned int RPONumber = 0; - for (MachineBasicBlock *MBB : RPOT) { + auto processMBB = [&](MachineBasicBlock *MBB) { OrderToBB[RPONumber] = MBB; BBToOrder[MBB] = RPONumber; BBNumToRPO[MBB->getNumber()] = RPONumber; ++RPONumber; - } + }; + for (MachineBasicBlock *MBB : RPOT) + processMBB(MBB); + for (MachineBasicBlock &MBB : MF) + if (BBToOrder.find(&MBB) == BBToOrder.end()) + processMBB(&MBB); // Order value substitutions by their "source" operand pair, for quick lookup. llvm::sort(MF.DebugValueSubstitutions); @@ -3037,7 +3511,8 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit( // instructions, installing transfers. MTracker->reset(); MTracker->loadFromArray(MInLocs[BBNum], BBNum); - TTracker->loadInlocs(MBB, MInLocs[BBNum], Output[BBNum], NumLocs); + TTracker->loadInlocs(MBB, MInLocs[BBNum], DbgOpStore, Output[BBNum], + NumLocs); CurBB = BBNum; CurInst = 1; @@ -3335,6 +3810,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, OverlapFragments.clear(); SeenFragments.clear(); SeenDbgPHIs.clear(); + DbgOpStore.clear(); return Changed; } @@ -3596,7 +4072,7 @@ public: } // end namespace llvm -Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs( +std::optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs( MachineFunction &MF, const ValueTable *MLiveOuts, const ValueTable *MLiveIns, MachineInstr &Here, uint64_t InstrNum) { assert(MLiveOuts && MLiveIns && @@ -3605,17 +4081,17 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs( // This function will be called twice per DBG_INSTR_REF, and might end up // computing lots of SSA information: memoize it. - auto SeenDbgPHIIt = SeenDbgPHIs.find(&Here); + auto SeenDbgPHIIt = SeenDbgPHIs.find(std::make_pair(&Here, InstrNum)); if (SeenDbgPHIIt != SeenDbgPHIs.end()) return SeenDbgPHIIt->second; - Optional<ValueIDNum> Result = + std::optional<ValueIDNum> Result = resolveDbgPHIsImpl(MF, MLiveOuts, MLiveIns, Here, InstrNum); - SeenDbgPHIs.insert({&Here, Result}); + SeenDbgPHIs.insert({std::make_pair(&Here, InstrNum), Result}); return Result; } -Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl( +std::optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl( MachineFunction &MF, const ValueTable *MLiveOuts, const ValueTable *MLiveIns, MachineInstr &Here, uint64_t InstrNum) { // Pick out records of DBG_PHI instructions that have been observed. If there @@ -3627,7 +4103,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl( // No DBG_PHI means there can be no location. if (LowerIt == UpperIt) - return None; + return std::nullopt; // If any DBG_PHIs referred to a location we didn't understand, don't try to // compute a value. There might be scenarios where we could recover a value @@ -3636,7 +4112,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl( auto DBGPHIRange = make_range(LowerIt, UpperIt); for (const DebugPHIRecord &DBG_PHI : DBGPHIRange) if (!DBG_PHI.ValueRead) - return None; + return std::nullopt; // If there's only one DBG_PHI, then that is our value number. if (std::distance(LowerIt, UpperIt) == 1) @@ -3720,7 +4196,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl( for (auto &PHIIt : PHI->IncomingValues) { // Any undef input means DBG_PHIs didn't dominate the use point. if (Updater.UndefMap.find(&PHIIt.first->BB) != Updater.UndefMap.end()) - return None; + return std::nullopt; ValueIDNum ValueToCheck; const ValueTable &BlockLiveOuts = MLiveOuts[PHIIt.first->BB.getNumber()]; @@ -3739,7 +4215,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl( } if (BlockLiveOuts[Loc.asU64()] != ValueToCheck) - return None; + return std::nullopt; } // Record this value as validated. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h index 70aae47c8bdc..2fdc37c6dda2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DebugInfoMetadata.h" +#include <optional> #include "LiveDebugValues.h" @@ -30,6 +31,7 @@ class InstrRefLDVTest; namespace LiveDebugValues { class MLocTracker; +class DbgOpIDMap; using namespace llvm; @@ -168,6 +170,40 @@ public: static ValueIDNum TombstoneValue; }; +} // End namespace LiveDebugValues + +namespace llvm { +using namespace LiveDebugValues; + +template <> struct DenseMapInfo<LocIdx> { + static inline LocIdx getEmptyKey() { return LocIdx::MakeIllegalLoc(); } + static inline LocIdx getTombstoneKey() { return LocIdx::MakeTombstoneLoc(); } + + static unsigned getHashValue(const LocIdx &Loc) { return Loc.asU64(); } + + static bool isEqual(const LocIdx &A, const LocIdx &B) { return A == B; } +}; + +template <> struct DenseMapInfo<ValueIDNum> { + static inline ValueIDNum getEmptyKey() { return ValueIDNum::EmptyValue; } + static inline ValueIDNum getTombstoneKey() { + return ValueIDNum::TombstoneValue; + } + + static unsigned getHashValue(const ValueIDNum &Val) { + return hash_value(Val.asU64()); + } + + static bool isEqual(const ValueIDNum &A, const ValueIDNum &B) { + return A == B; + } +}; + +} // end namespace llvm + +namespace LiveDebugValues { +using namespace llvm; + /// Type for a table of values in a block. using ValueTable = std::unique_ptr<ValueIDNum[]>; @@ -199,41 +235,219 @@ public: /// the value, and Boolean of whether or not it's indirect. class DbgValueProperties { public: - DbgValueProperties(const DIExpression *DIExpr, bool Indirect) - : DIExpr(DIExpr), Indirect(Indirect) {} + DbgValueProperties(const DIExpression *DIExpr, bool Indirect, bool IsVariadic) + : DIExpr(DIExpr), Indirect(Indirect), IsVariadic(IsVariadic) {} /// Extract properties from an existing DBG_VALUE instruction. DbgValueProperties(const MachineInstr &MI) { assert(MI.isDebugValue()); + assert(MI.getDebugExpression()->getNumLocationOperands() == 0 || + MI.isDebugValueList() || MI.isUndefDebugValue()); + IsVariadic = MI.isDebugValueList(); DIExpr = MI.getDebugExpression(); - Indirect = MI.getOperand(1).isImm(); + Indirect = MI.isDebugOffsetImm(); + } + + bool isJoinable(const DbgValueProperties &Other) const { + return DIExpression::isEqualExpression(DIExpr, Indirect, Other.DIExpr, + Other.Indirect); } bool operator==(const DbgValueProperties &Other) const { - return std::tie(DIExpr, Indirect) == std::tie(Other.DIExpr, Other.Indirect); + return std::tie(DIExpr, Indirect, IsVariadic) == + std::tie(Other.DIExpr, Other.Indirect, Other.IsVariadic); } bool operator!=(const DbgValueProperties &Other) const { return !(*this == Other); } + unsigned getLocationOpCount() const { + return IsVariadic ? DIExpr->getNumLocationOperands() : 1; + } + const DIExpression *DIExpr; bool Indirect; + bool IsVariadic; +}; + +/// TODO: Might pack better if we changed this to a Struct of Arrays, since +/// MachineOperand is width 32, making this struct width 33. We could also +/// potentially avoid storing the whole MachineOperand (sizeof=32), instead +/// choosing to store just the contents portion (sizeof=8) and a Kind enum, +/// since we already know it is some type of immediate value. +/// Stores a single debug operand, which can either be a MachineOperand for +/// directly storing immediate values, or a ValueIDNum representing some value +/// computed at some point in the program. IsConst is used as a discriminator. +struct DbgOp { + union { + ValueIDNum ID; + MachineOperand MO; + }; + bool IsConst; + + DbgOp() : ID(ValueIDNum::EmptyValue), IsConst(false) {} + DbgOp(ValueIDNum ID) : ID(ID), IsConst(false) {} + DbgOp(MachineOperand MO) : MO(MO), IsConst(true) {} + + bool isUndef() const { return !IsConst && ID == ValueIDNum::EmptyValue; } + +#ifndef NDEBUG + void dump(const MLocTracker *MTrack) const; +#endif +}; + +/// A DbgOp whose ID (if any) has resolved to an actual location, LocIdx. Used +/// when working with concrete debug values, i.e. when joining MLocs and VLocs +/// in the TransferTracker or emitting DBG_VALUE/DBG_VALUE_LIST instructions in +/// the MLocTracker. +struct ResolvedDbgOp { + union { + LocIdx Loc; + MachineOperand MO; + }; + bool IsConst; + + ResolvedDbgOp(LocIdx Loc) : Loc(Loc), IsConst(false) {} + ResolvedDbgOp(MachineOperand MO) : MO(MO), IsConst(true) {} + + bool operator==(const ResolvedDbgOp &Other) const { + if (IsConst != Other.IsConst) + return false; + if (IsConst) + return MO.isIdenticalTo(Other.MO); + return Loc == Other.Loc; + } + +#ifndef NDEBUG + void dump(const MLocTracker *MTrack) const; +#endif }; -/// Class recording the (high level) _value_ of a variable. Identifies either -/// the value of the variable as a ValueIDNum, or a constant MachineOperand. +/// An ID used in the DbgOpIDMap (below) to lookup a stored DbgOp. This is used +/// in place of actual DbgOps inside of a DbgValue to reduce its size, as +/// DbgValue is very frequently used and passed around, and the actual DbgOp is +/// over 8x larger than this class, due to storing a MachineOperand. This ID +/// should be equal for all equal DbgOps, and also encodes whether the mapped +/// DbgOp is a constant, meaning that for simple equality or const-ness checks +/// it is not necessary to lookup this ID. +struct DbgOpID { + struct IsConstIndexPair { + uint32_t IsConst : 1; + uint32_t Index : 31; + }; + + union { + struct IsConstIndexPair ID; + uint32_t RawID; + }; + + DbgOpID() : RawID(UndefID.RawID) { + static_assert(sizeof(DbgOpID) == 4, "DbgOpID should fit within 4 bytes."); + } + DbgOpID(uint32_t RawID) : RawID(RawID) {} + DbgOpID(bool IsConst, uint32_t Index) : ID({IsConst, Index}) {} + + static DbgOpID UndefID; + + bool operator==(const DbgOpID &Other) const { return RawID == Other.RawID; } + bool operator!=(const DbgOpID &Other) const { return !(*this == Other); } + + uint32_t asU32() const { return RawID; } + + bool isUndef() const { return *this == UndefID; } + bool isConst() const { return ID.IsConst && !isUndef(); } + uint32_t getIndex() const { return ID.Index; } + +#ifndef NDEBUG + void dump(const MLocTracker *MTrack, const DbgOpIDMap *OpStore) const; +#endif +}; + +/// Class storing the complete set of values that are observed by DbgValues +/// within the current function. Allows 2-way lookup, with `find` returning the +/// Op for a given ID and `insert` returning the ID for a given Op (creating one +/// if none exists). +class DbgOpIDMap { + + SmallVector<ValueIDNum, 0> ValueOps; + SmallVector<MachineOperand, 0> ConstOps; + + DenseMap<ValueIDNum, DbgOpID> ValueOpToID; + DenseMap<MachineOperand, DbgOpID> ConstOpToID; + +public: + /// If \p Op does not already exist in this map, it is inserted and the + /// corresponding DbgOpID is returned. If Op already exists in this map, then + /// no change is made and the existing ID for Op is returned. + /// Calling this with the undef DbgOp will always return DbgOpID::UndefID. + DbgOpID insert(DbgOp Op) { + if (Op.isUndef()) + return DbgOpID::UndefID; + if (Op.IsConst) + return insertConstOp(Op.MO); + return insertValueOp(Op.ID); + } + /// Returns the DbgOp associated with \p ID. Should only be used for IDs + /// returned from calling `insert` from this map or DbgOpID::UndefID. + DbgOp find(DbgOpID ID) const { + if (ID == DbgOpID::UndefID) + return DbgOp(); + if (ID.isConst()) + return DbgOp(ConstOps[ID.getIndex()]); + return DbgOp(ValueOps[ID.getIndex()]); + } + + void clear() { + ValueOps.clear(); + ConstOps.clear(); + ValueOpToID.clear(); + ConstOpToID.clear(); + } + +private: + DbgOpID insertConstOp(MachineOperand &MO) { + auto ExistingIt = ConstOpToID.find(MO); + if (ExistingIt != ConstOpToID.end()) + return ExistingIt->second; + DbgOpID ID(true, ConstOps.size()); + ConstOpToID.insert(std::make_pair(MO, ID)); + ConstOps.push_back(MO); + return ID; + } + DbgOpID insertValueOp(ValueIDNum VID) { + auto ExistingIt = ValueOpToID.find(VID); + if (ExistingIt != ValueOpToID.end()) + return ExistingIt->second; + DbgOpID ID(false, ValueOps.size()); + ValueOpToID.insert(std::make_pair(VID, ID)); + ValueOps.push_back(VID); + return ID; + } +}; + +// We set the maximum number of operands that we will handle to keep DbgValue +// within a reasonable size (64 bytes), as we store and pass a lot of them +// around. +#define MAX_DBG_OPS 8 + +/// Class recording the (high level) _value_ of a variable. Identifies the value +/// of the variable as a list of ValueIDNums and constant MachineOperands, or as +/// an empty list for undef debug values or VPHI values which we have not found +/// valid locations for. /// This class also stores meta-information about how the value is qualified. /// Used to reason about variable values when performing the second /// (DebugVariable specific) dataflow analysis. class DbgValue { +private: + /// If Kind is Def or VPHI, the set of IDs corresponding to the DbgOps that + /// are used. VPHIs set every ID to EmptyID when we have not found a valid + /// machine-value for every operand, and sets them to the corresponding + /// machine-values when we have found all of them. + DbgOpID DbgOps[MAX_DBG_OPS]; + unsigned OpCount; + public: - /// If Kind is Def, the value number that this value is based on. VPHIs set - /// this field to EmptyValue if there is no machine-value for this VPHI, or - /// the corresponding machine-value if there is one. - ValueIDNum ID; - /// If Kind is Const, the MachineOperand defining this value. - Optional<MachineOperand> MO; /// For a NoVal or VPHI DbgValue, which block it was generated in. int BlockNo; @@ -242,8 +456,8 @@ public: typedef enum { Undef, // Represents a DBG_VALUE $noreg in the transfer function only. - Def, // This value is defined by an inst, or is a PHI value. - Const, // A constant value contained in the MachineOperand field. + Def, // This value is defined by some combination of constants, + // instructions, or PHI values. VPHI, // Incoming values to BlockNo differ, those values must be joined by // a PHI in this block. NoVal, // Empty DbgValue indicating an unknown value. Used as initializer, @@ -252,52 +466,113 @@ public: /// Discriminator for whether this is a constant or an in-program value. KindT Kind; - DbgValue(const ValueIDNum &Val, const DbgValueProperties &Prop, KindT Kind) - : ID(Val), MO(None), BlockNo(0), Properties(Prop), Kind(Kind) { - assert(Kind == Def); + DbgValue(ArrayRef<DbgOpID> DbgOps, const DbgValueProperties &Prop) + : OpCount(DbgOps.size()), BlockNo(0), Properties(Prop), Kind(Def) { + static_assert(sizeof(DbgValue) <= 64, + "DbgValue should fit within 64 bytes."); + assert(DbgOps.size() == Prop.getLocationOpCount()); + if (DbgOps.size() > MAX_DBG_OPS || + any_of(DbgOps, [](DbgOpID ID) { return ID.isUndef(); })) { + Kind = Undef; + OpCount = 0; +#define DEBUG_TYPE "LiveDebugValues" + if (DbgOps.size() > MAX_DBG_OPS) { + LLVM_DEBUG(dbgs() << "Found DbgValue with more than maximum allowed " + "operands.\n"); + } +#undef DEBUG_TYPE + } else { + for (unsigned Idx = 0; Idx < DbgOps.size(); ++Idx) + this->DbgOps[Idx] = DbgOps[Idx]; + } } DbgValue(unsigned BlockNo, const DbgValueProperties &Prop, KindT Kind) - : ID(ValueIDNum::EmptyValue), MO(None), BlockNo(BlockNo), - Properties(Prop), Kind(Kind) { + : OpCount(0), BlockNo(BlockNo), Properties(Prop), Kind(Kind) { assert(Kind == NoVal || Kind == VPHI); } - DbgValue(const MachineOperand &MO, const DbgValueProperties &Prop, KindT Kind) - : ID(ValueIDNum::EmptyValue), MO(MO), BlockNo(0), Properties(Prop), - Kind(Kind) { - assert(Kind == Const); - } - DbgValue(const DbgValueProperties &Prop, KindT Kind) - : ID(ValueIDNum::EmptyValue), MO(None), BlockNo(0), Properties(Prop), - Kind(Kind) { + : OpCount(0), BlockNo(0), Properties(Prop), Kind(Kind) { assert(Kind == Undef && "Empty DbgValue constructor must pass in Undef kind"); } #ifndef NDEBUG - void dump(const MLocTracker *MTrack) const; + void dump(const MLocTracker *MTrack = nullptr, + const DbgOpIDMap *OpStore = nullptr) const; #endif bool operator==(const DbgValue &Other) const { if (std::tie(Kind, Properties) != std::tie(Other.Kind, Other.Properties)) return false; - else if (Kind == Def && ID != Other.ID) + else if (Kind == Def && !equal(getDbgOpIDs(), Other.getDbgOpIDs())) return false; else if (Kind == NoVal && BlockNo != Other.BlockNo) return false; - else if (Kind == Const) - return MO->isIdenticalTo(*Other.MO); else if (Kind == VPHI && BlockNo != Other.BlockNo) return false; - else if (Kind == VPHI && ID != Other.ID) + else if (Kind == VPHI && !equal(getDbgOpIDs(), Other.getDbgOpIDs())) return false; return true; } bool operator!=(const DbgValue &Other) const { return !(*this == Other); } + + // Returns an array of all the machine values used to calculate this variable + // value, or an empty list for an Undef or unjoined VPHI. + ArrayRef<DbgOpID> getDbgOpIDs() const { return {DbgOps, OpCount}; } + + // Returns either DbgOps[Index] if this DbgValue has Debug Operands, or + // the ID for ValueIDNum::EmptyValue otherwise (i.e. if this is an Undef, + // NoVal, or an unjoined VPHI). + DbgOpID getDbgOpID(unsigned Index) const { + if (!OpCount) + return DbgOpID::UndefID; + assert(Index < OpCount); + return DbgOps[Index]; + } + // Replaces this DbgValue's existing DbgOpIDs (if any) with the contents of + // \p NewIDs. The number of DbgOpIDs passed must be equal to the number of + // arguments expected by this DbgValue's properties (the return value of + // `getLocationOpCount()`). + void setDbgOpIDs(ArrayRef<DbgOpID> NewIDs) { + // We can go from no ops to some ops, but not from some ops to no ops. + assert(NewIDs.size() == getLocationOpCount() && + "Incorrect number of Debug Operands for this DbgValue."); + OpCount = NewIDs.size(); + for (unsigned Idx = 0; Idx < NewIDs.size(); ++Idx) + DbgOps[Idx] = NewIDs[Idx]; + } + + // The number of debug operands expected by this DbgValue's expression. + // getDbgOpIDs() should return an array of this length, unless this is an + // Undef or an unjoined VPHI. + unsigned getLocationOpCount() const { + return Properties.getLocationOpCount(); + } + + // Returns true if this or Other are unjoined PHIs, which do not have defined + // Loc Ops, or if the `n`th Loc Op for this has a different constness to the + // `n`th Loc Op for Other. + bool hasJoinableLocOps(const DbgValue &Other) const { + if (isUnjoinedPHI() || Other.isUnjoinedPHI()) + return true; + for (unsigned Idx = 0; Idx < getLocationOpCount(); ++Idx) { + if (getDbgOpID(Idx).isConst() != Other.getDbgOpID(Idx).isConst()) + return false; + } + return true; + } + + bool isUnjoinedPHI() const { return Kind == VPHI && OpCount == 0; } + + bool hasIdenticalValidLocOps(const DbgValue &Other) const { + if (!OpCount) + return false; + return equal(getDbgOpIDs(), Other.getDbgOpIDs()); + } }; class LocIdxToIndexFunctor { @@ -620,9 +895,9 @@ public: void writeRegMask(const MachineOperand *MO, unsigned CurBB, unsigned InstID); /// Find LocIdx for SpillLoc \p L, creating a new one if it's not tracked. - /// Returns None when in scenarios where a spill slot could be tracked, but - /// we would likely run into resource limitations. - Optional<SpillLocationNo> getOrTrackSpillLoc(SpillLoc L); + /// Returns std::nullopt when in scenarios where a spill slot could be + /// tracked, but we would likely run into resource limitations. + std::optional<SpillLocationNo> getOrTrackSpillLoc(SpillLoc L); // Get LocIdx of a spill ID. LocIdx getSpillMLoc(unsigned SpillID) { @@ -667,10 +942,11 @@ public: LLVM_DUMP_METHOD void dump_mloc_map(); #endif - /// Create a DBG_VALUE based on machine location \p MLoc. Qualify it with the + /// Create a DBG_VALUE based on debug operands \p DbgOps. Qualify it with the /// information in \pProperties, for variable Var. Don't insert it anywhere, /// just return the builder for it. - MachineInstrBuilder emitLoc(Optional<LocIdx> MLoc, const DebugVariable &Var, + MachineInstrBuilder emitLoc(const SmallVectorImpl<ResolvedDbgOp> &DbgOps, + const DebugVariable &Var, const DbgValueProperties &Properties); }; @@ -704,32 +980,16 @@ public: public: VLocTracker(const OverlapMap &O, const DIExpression *EmptyExpr) - : OverlappingFragments(O), EmptyProperties(EmptyExpr, false) {} + : OverlappingFragments(O), EmptyProperties(EmptyExpr, false, false) {} void defVar(const MachineInstr &MI, const DbgValueProperties &Properties, - Optional<ValueIDNum> ID) { - assert(MI.isDebugValue() || MI.isDebugRef()); + const SmallVectorImpl<DbgOpID> &DebugOps) { + assert(MI.isDebugValueLike()); DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), MI.getDebugLoc()->getInlinedAt()); - DbgValue Rec = (ID) ? DbgValue(*ID, Properties, DbgValue::Def) - : DbgValue(Properties, DbgValue::Undef); - - // Attempt insertion; overwrite if it's already mapped. - auto Result = Vars.insert(std::make_pair(Var, Rec)); - if (!Result.second) - Result.first->second = Rec; - Scopes[Var] = MI.getDebugLoc().get(); - - considerOverlaps(Var, MI.getDebugLoc().get()); - } - - void defVar(const MachineInstr &MI, const MachineOperand &MO) { - // Only DBG_VALUEs can define constant-valued variables. - assert(MI.isDebugValue()); - DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), - MI.getDebugLoc()->getInlinedAt()); - DbgValueProperties Properties(MI); - DbgValue Rec = DbgValue(MO, Properties, DbgValue::Const); + DbgValue Rec = (DebugOps.size() > 0) + ? DbgValue(DebugOps, Properties) + : DbgValue(Properties, DbgValue::Undef); // Attempt insertion; overwrite if it's already mapped. auto Result = Vars.insert(std::make_pair(Var, Rec)); @@ -751,9 +1011,9 @@ public: // The "empty" fragment is stored as DebugVariable::DefaultFragment, so // that it overlaps with everything, however its cannonical representation // in a DebugVariable is as "None". - Optional<DIExpression::FragmentInfo> OptFragmentInfo = FragmentInfo; + std::optional<DIExpression::FragmentInfo> OptFragmentInfo = FragmentInfo; if (DebugVariable::isDefaultFragment(FragmentInfo)) - OptFragmentInfo = None; + OptFragmentInfo = std::nullopt; DebugVariable Overlapped(Var.getVariable(), OptFragmentInfo, Var.getInlinedAt()); @@ -779,7 +1039,7 @@ public: friend class ::InstrRefLDVTest; using FragmentInfo = DIExpression::FragmentInfo; - using OptFragmentInfo = Optional<DIExpression::FragmentInfo>; + using OptFragmentInfo = std::optional<DIExpression::FragmentInfo>; // Helper while building OverlapMap, a map of all fragments seen for a given // DILocalVariable. @@ -872,12 +1132,12 @@ private: uint64_t InstrNum; /// Block where DBG_PHI occurred. MachineBasicBlock *MBB; - /// The value number read by the DBG_PHI -- or None if it didn't refer to - /// a value. - Optional<ValueIDNum> ValueRead; - /// Register/Stack location the DBG_PHI reads -- or None if it referred to - /// something unexpected. - Optional<LocIdx> ReadLoc; + /// The value number read by the DBG_PHI -- or std::nullopt if it didn't + /// refer to a value. + std::optional<ValueIDNum> ValueRead; + /// Register/Stack location the DBG_PHI reads -- or std::nullopt if it + /// referred to something unexpected. + std::optional<LocIdx> ReadLoc; operator unsigned() const { return InstrNum; } }; @@ -896,7 +1156,10 @@ private: /// DBG_INSTR_REFs that call resolveDbgPHIs. These variable references solve /// a mini SSA problem caused by DBG_PHIs being cloned, this collection caches /// the result. - DenseMap<MachineInstr *, Optional<ValueIDNum>> SeenDbgPHIs; + DenseMap<std::pair<MachineInstr *, unsigned>, std::optional<ValueIDNum>> + SeenDbgPHIs; + + DbgOpIDMap DbgOpStore; /// True if we need to examine call instructions for stack clobbers. We /// normally assume that they don't clobber SP, but stack probes on Windows @@ -909,8 +1172,8 @@ private: StringRef StackProbeSymbolName; /// Tests whether this instruction is a spill to a stack slot. - Optional<SpillLocationNo> isSpillInstruction(const MachineInstr &MI, - MachineFunction *MF); + std::optional<SpillLocationNo> isSpillInstruction(const MachineInstr &MI, + MachineFunction *MF); /// Decide if @MI is a spill instruction and return true if it is. We use 2 /// criteria to make this decision: @@ -923,14 +1186,23 @@ private: /// If a given instruction is identified as a spill, return the spill slot /// and set \p Reg to the spilled register. - Optional<SpillLocationNo> isRestoreInstruction(const MachineInstr &MI, - MachineFunction *MF, unsigned &Reg); + std::optional<SpillLocationNo> isRestoreInstruction(const MachineInstr &MI, + MachineFunction *MF, + unsigned &Reg); /// Given a spill instruction, extract the spill slot information, ensure it's /// tracked, and return the spill number. - Optional<SpillLocationNo> + std::optional<SpillLocationNo> extractSpillBaseRegAndOffset(const MachineInstr &MI); + /// For an instruction reference given by \p InstNo and \p OpNo in instruction + /// \p MI returns the Value pointed to by that instruction reference if any + /// exists, otherwise returns None. + std::optional<ValueIDNum> getValueForInstrRef(unsigned InstNo, unsigned OpNo, + MachineInstr &MI, + const ValueTable *MLiveOuts, + const ValueTable *MLiveIns); + /// Observe a single instruction while stepping through a block. void process(MachineInstr &MI, const ValueTable *MLiveOuts, const ValueTable *MLiveIns); @@ -972,17 +1244,18 @@ private: /// forming another mini-ssa problem to solve. /// \p Here the position of a DBG_INSTR_REF seeking a machine value number /// \p InstrNum Debug instruction number defined by DBG_PHI instructions. - /// \returns The machine value number at position Here, or None. - Optional<ValueIDNum> resolveDbgPHIs(MachineFunction &MF, - const ValueTable *MLiveOuts, - const ValueTable *MLiveIns, - MachineInstr &Here, uint64_t InstrNum); - - Optional<ValueIDNum> resolveDbgPHIsImpl(MachineFunction &MF, - const ValueTable *MLiveOuts, - const ValueTable *MLiveIns, - MachineInstr &Here, - uint64_t InstrNum); + /// \returns The machine value number at position Here, or std::nullopt. + std::optional<ValueIDNum> resolveDbgPHIs(MachineFunction &MF, + const ValueTable *MLiveOuts, + const ValueTable *MLiveIns, + MachineInstr &Here, + uint64_t InstrNum); + + std::optional<ValueIDNum> resolveDbgPHIsImpl(MachineFunction &MF, + const ValueTable *MLiveOuts, + const ValueTable *MLiveIns, + MachineInstr &Here, + uint64_t InstrNum); /// Step through the function, recording register definitions and movements /// in an MLocTracker. Convert the observations into a per-block transfer @@ -1086,14 +1359,21 @@ private: SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore, DbgValue &LiveIn); - /// For the given block and live-outs feeding into it, try to find a - /// machine location where all the variable values join together. - /// \returns Value ID of a machine PHI if an appropriate one is available. - Optional<ValueIDNum> - pickVPHILoc(const MachineBasicBlock &MBB, const DebugVariable &Var, + /// For the given block and live-outs feeding into it, try to find + /// machine locations for each debug operand where all the values feeding + /// into that operand join together. + /// \returns true if a joined location was found for every value that needed + /// to be joined. + bool + pickVPHILoc(SmallVectorImpl<DbgOpID> &OutValues, const MachineBasicBlock &MBB, const LiveIdxT &LiveOuts, FuncValueTable &MOutLocs, const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders); + std::optional<ValueIDNum> pickOperandPHILoc( + unsigned DbgOpIdx, const MachineBasicBlock &MBB, const LiveIdxT &LiveOuts, + FuncValueTable &MOutLocs, + const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders); + /// Take collections of DBG_VALUE instructions stored in TTracker, and /// install them into their output blocks. Preserves a stable order of /// DBG_VALUEs produced (which would otherwise cause nondeterminism) through @@ -1138,6 +1418,7 @@ public: void dump_mloc_transfer(const MLocTransferMap &mloc_transfer) const; bool isCalleeSaved(LocIdx L) const; + bool isCalleeSavedReg(Register R) const; bool hasFoldedStackStore(const MachineInstr &MI) { // Instruction must have a memory operand that's a stack slot, and isn't @@ -1152,38 +1433,9 @@ public: && !MemOperand->getPseudoValue()->isAliased(MFI); } - Optional<LocIdx> findLocationForMemOperand(const MachineInstr &MI); + std::optional<LocIdx> findLocationForMemOperand(const MachineInstr &MI); }; } // namespace LiveDebugValues -namespace llvm { -using namespace LiveDebugValues; - -template <> struct DenseMapInfo<LocIdx> { - static inline LocIdx getEmptyKey() { return LocIdx::MakeIllegalLoc(); } - static inline LocIdx getTombstoneKey() { return LocIdx::MakeTombstoneLoc(); } - - static unsigned getHashValue(const LocIdx &Loc) { return Loc.asU64(); } - - static bool isEqual(const LocIdx &A, const LocIdx &B) { return A == B; } -}; - -template <> struct DenseMapInfo<ValueIDNum> { - static inline ValueIDNum getEmptyKey() { return ValueIDNum::EmptyValue; } - static inline ValueIDNum getTombstoneKey() { - return ValueIDNum::TombstoneValue; - } - - static unsigned getHashValue(const ValueIDNum &Val) { - return hash_value(Val.asU64()); - } - - static bool isEqual(const ValueIDNum &A, const ValueIDNum &B) { - return A == B; - } -}; - -} // end namespace llvm - #endif /* LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H */ diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp index 141008ac2296..9dba9a88f703 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp @@ -18,6 +18,7 @@ #include "llvm/Pass.h" #include "llvm/PassRegistry.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" /// \file LiveDebugValues.cpp /// @@ -72,11 +73,6 @@ public: /// Calculate the liveness information for the given machine function. bool runOnMachineFunction(MachineFunction &MF) override; - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoVRegs); - } - void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); @@ -106,6 +102,14 @@ LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) { } bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { + // Except for Wasm, all targets should be only using physical register at this + // point. Wasm only use virtual registers throught its pipeline, but its + // virtual registers don't participate in this LiveDebugValues analysis; only + // its target indices do. + assert(MF.getTarget().getTargetTriple().isWasm() || + MF.getProperties().hasProperty( + MachineFunctionProperties::Property::NoVRegs)); + bool InstrRefBased = MF.useDebugInstrRef(); // Allow the user to force selection of InstrRef LDV. InstrRefBased |= ForceInstrRefLDV; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp index 32e07eb77efe..b78757b855f4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp @@ -10,12 +10,13 @@ /// /// LiveDebugValues is an optimistic "available expressions" dataflow /// algorithm. The set of expressions is the set of machine locations -/// (registers, spill slots, constants) that a variable fragment might be -/// located, qualified by a DIExpression and indirect-ness flag, while each -/// variable is identified by a DebugVariable object. The availability of an -/// expression begins when a DBG_VALUE instruction specifies the location of a -/// DebugVariable, and continues until that location is clobbered or -/// re-specified by a different DBG_VALUE for the same DebugVariable. +/// (registers, spill slots, constants, and target indices) that a variable +/// fragment might be located, qualified by a DIExpression and indirect-ness +/// flag, while each variable is identified by a DebugVariable object. The +/// availability of an expression begins when a DBG_VALUE instruction specifies +/// the location of a DebugVariable, and continues until that location is +/// clobbered or re-specified by a different DBG_VALUE for the same +/// DebugVariable. /// /// The output of LiveDebugValues is additional DBG_VALUE instructions, /// placed to extend variable locations as far they're available. This file @@ -148,6 +149,7 @@ #include <cstdint> #include <functional> #include <map> +#include <optional> #include <queue> #include <tuple> #include <utility> @@ -229,6 +231,14 @@ struct LocIndex { static constexpr u32_location_t kEntryValueBackupLocation = kFirstInvalidRegLocation + 1; + /// A special location reserved for VarLocs with locations of kind + /// WasmLocKind. + /// TODO Placing all Wasm target index locations in this single kWasmLocation + /// may cause slowdown in compilation time in very large functions. Consider + /// giving a each target index/offset pair its own u32_location_t if this + /// becomes a problem. + static constexpr u32_location_t kWasmLocation = kFirstInvalidRegLocation + 2; + LocIndex(u32_location_t Location, u32_index_t Index) : Location(Location), Index(Index) {} @@ -237,8 +247,7 @@ struct LocIndex { } template<typename IntT> static LocIndex fromRawInteger(IntT ID) { - static_assert(std::is_unsigned<IntT>::value && - sizeof(ID) == sizeof(uint64_t), + static_assert(std::is_unsigned_v<IntT> && sizeof(ID) == sizeof(uint64_t), "Cannot convert raw integer to LocIndex"); return {static_cast<u32_location_t>(ID >> 32), static_cast<u32_index_t>(ID)}; @@ -282,7 +291,7 @@ private: enum struct TransferKind { TransferCopy, TransferSpill, TransferRestore }; using FragmentInfo = DIExpression::FragmentInfo; - using OptFragmentInfo = Optional<DIExpression::FragmentInfo>; + using OptFragmentInfo = std::optional<DIExpression::FragmentInfo>; /// A pair of debug variable and value location. struct VarLoc { @@ -299,6 +308,21 @@ private: } }; + // Target indices used for wasm-specific locations. + struct WasmLoc { + // One of TargetIndex values defined in WebAssembly.h. We deal with + // local-related TargetIndex in this analysis (TI_LOCAL and + // TI_LOCAL_INDIRECT). Stack operands (TI_OPERAND_STACK) will be handled + // separately WebAssemblyDebugFixup pass, and we don't associate debug + // info with values in global operands (TI_GLOBAL_RELOC) at the moment. + int Index; + int64_t Offset; + bool operator==(const WasmLoc &Other) const { + return Index == Other.Index && Offset == Other.Offset; + } + bool operator!=(const WasmLoc &Other) const { return !(*this == Other); } + }; + /// Identity of the variable at this location. const DebugVariable Var; @@ -313,7 +337,8 @@ private: InvalidKind = 0, RegisterKind, SpillLocKind, - ImmediateKind + ImmediateKind, + WasmLocKind }; enum class EntryValueLocKind { @@ -332,6 +357,7 @@ private: int64_t Immediate; const ConstantFP *FPImm; const ConstantInt *CImm; + WasmLoc WasmLocation; MachineLocValue() : Hash(0) {} }; @@ -348,6 +374,8 @@ private: switch (Kind) { case MachineLocKind::SpillLocKind: return Value.SpillLocation == Other.Value.SpillLocation; + case MachineLocKind::WasmLocKind: + return Value.WasmLocation == Other.Value.WasmLocation; case MachineLocKind::RegisterKind: case MachineLocKind::ImmediateKind: return Value.Hash == Other.Value.Hash; @@ -366,6 +394,11 @@ private: Other.Kind, Other.Value.SpillLocation.SpillBase, Other.Value.SpillLocation.SpillOffset.getFixed(), Other.Value.SpillLocation.SpillOffset.getScalable()); + case MachineLocKind::WasmLocKind: + return std::make_tuple(Kind, Value.WasmLocation.Index, + Value.WasmLocation.Offset) < + std::make_tuple(Other.Kind, Other.Value.WasmLocation.Index, + Other.Value.WasmLocation.Offset); case MachineLocKind::RegisterKind: case MachineLocKind::ImmediateKind: return std::tie(Kind, Value.Hash) < @@ -386,7 +419,7 @@ private: /// emitting a debug value. SmallVector<unsigned, 8> OrigLocMap; - VarLoc(const MachineInstr &MI, LexicalScopes &LS) + VarLoc(const MachineInstr &MI) : Var(MI.getDebugVariable(), MI.getDebugExpression(), MI.getDebugLoc()->getInlinedAt()), Expr(MI.getDebugExpression()), MI(MI) { @@ -429,6 +462,9 @@ private: } else if (Op.isCImm()) { Kind = MachineLocKind::ImmediateKind; Loc.CImm = Op.getCImm(); + } else if (Op.isTargetIndex()) { + Kind = MachineLocKind::WasmLocKind; + Loc.WasmLocation = {Op.getIndex(), Op.getOffset()}; } else llvm_unreachable("Invalid Op kind for MachineLoc."); return {Kind, Loc}; @@ -436,9 +472,9 @@ private: /// Take the variable and machine-location in DBG_VALUE MI, and build an /// entry location using the given expression. - static VarLoc CreateEntryLoc(const MachineInstr &MI, LexicalScopes &LS, + static VarLoc CreateEntryLoc(const MachineInstr &MI, const DIExpression *EntryExpr, Register Reg) { - VarLoc VL(MI, LS); + VarLoc VL(MI); assert(VL.Locs.size() == 1 && VL.Locs[0].Kind == MachineLocKind::RegisterKind); VL.EVKind = EntryValueLocKind::EntryValueKind; @@ -452,9 +488,8 @@ private: /// location will turn into the normal location if the backup is valid at /// the time of the primary location clobbering. static VarLoc CreateEntryBackupLoc(const MachineInstr &MI, - LexicalScopes &LS, const DIExpression *EntryExpr) { - VarLoc VL(MI, LS); + VarLoc VL(MI); assert(VL.Locs.size() == 1 && VL.Locs[0].Kind == MachineLocKind::RegisterKind); VL.EVKind = EntryValueLocKind::EntryValueBackupKind; @@ -466,10 +501,9 @@ private: /// function entry), and build a copy of an entry value backup location by /// setting the register location to NewReg. static VarLoc CreateEntryCopyBackupLoc(const MachineInstr &MI, - LexicalScopes &LS, const DIExpression *EntryExpr, Register NewReg) { - VarLoc VL(MI, LS); + VarLoc VL(MI); assert(VL.Locs.size() == 1 && VL.Locs[0].Kind == MachineLocKind::RegisterKind); VL.EVKind = EntryValueLocKind::EntryValueCopyBackupKind; @@ -564,6 +598,10 @@ private: MOs.push_back(Orig); break; } + case MachineLocKind::WasmLocKind: { + MOs.push_back(Orig); + break; + } case MachineLocKind::InvalidKind: llvm_unreachable("Tried to produce DBG_VALUE for invalid VarLoc"); } @@ -649,6 +687,21 @@ private: llvm_unreachable("Could not find given SpillLoc in Locs"); } + bool containsWasmLocs() const { + return any_of(Locs, [](VarLoc::MachineLoc ML) { + return ML.Kind == VarLoc::MachineLocKind::WasmLocKind; + }); + } + + /// If this variable is described in whole or part by \p WasmLocation, + /// return true. + bool usesWasmLoc(WasmLoc WasmLocation) const { + MachineLoc WasmML; + WasmML.Kind = MachineLocKind::WasmLocKind; + WasmML.Value.WasmLocation = WasmLocation; + return is_contained(Locs, WasmML); + } + /// Determine whether the lexical scope of this value's debug location /// dominates MBB. bool dominates(LexicalScopes &LS, MachineBasicBlock &MBB) const { @@ -656,8 +709,9 @@ private: } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - // TRI can be null. - void dump(const TargetRegisterInfo *TRI, raw_ostream &Out = dbgs()) const { + // TRI and TII can be null. + void dump(const TargetRegisterInfo *TRI, const TargetInstrInfo *TII, + raw_ostream &Out = dbgs()) const { Out << "VarLoc("; for (const MachineLoc &MLoc : Locs) { if (Locs.begin() != &MLoc) @@ -676,6 +730,22 @@ private: case MachineLocKind::ImmediateKind: Out << MLoc.Value.Immediate; break; + case MachineLocKind::WasmLocKind: { + if (TII) { + auto Indices = TII->getSerializableTargetIndices(); + auto Found = + find_if(Indices, [&](const std::pair<int, const char *> &I) { + return I.first == MLoc.Value.WasmLocation.Index; + }); + assert(Found != Indices.end()); + Out << Found->second; + if (MLoc.Value.WasmLocation.Offset > 0) + Out << " + " << MLoc.Value.WasmLocation.Offset; + } else { + Out << "WasmLoc"; + } + break; + } case MachineLocKind::InvalidKind: llvm_unreachable("Invalid VarLoc in dump method"); } @@ -743,10 +813,10 @@ private: return RegNo < LocIndex::kFirstInvalidRegLocation; }) && "Physreg out of range?"); - if (VL.containsSpillLocs()) { - LocIndex::u32_location_t Loc = LocIndex::kSpillLocation; - Locations.push_back(Loc); - } + if (VL.containsSpillLocs()) + Locations.push_back(LocIndex::kSpillLocation); + if (VL.containsWasmLocs()) + Locations.push_back(LocIndex::kWasmLocation); } else if (VL.EVKind != VarLoc::EntryValueLocKind::EntryValueKind) { LocIndex::u32_location_t Loc = LocIndex::kEntryValueBackupLocation; Locations.push_back(Loc); @@ -859,7 +929,7 @@ private: /// Insert a set of ranges. void insertFromLocSet(const VarLocSet &ToLoad, const VarLocMap &Map); - llvm::Optional<LocIndices> getEntryValueBackup(DebugVariable Var); + std::optional<LocIndices> getEntryValueBackup(DebugVariable Var); /// Empty the set. void clear() { @@ -899,6 +969,12 @@ private: return LocIndex::indexRangeForLocation( getVarLocs(), LocIndex::kEntryValueBackupLocation); } + + /// Get all set IDs for VarLocs with MLs of kind WasmLocKind. + auto getWasmVarLocs() const { + return LocIndex::indexRangeForLocation(getVarLocs(), + LocIndex::kWasmLocation); + } }; /// Collect all VarLoc IDs from \p CollectFrom for VarLocs with MLs of kind @@ -946,9 +1022,9 @@ private: /// If a given instruction is identified as a spill, return the spill location /// and set \p Reg to the spilled register. - Optional<VarLoc::SpillLoc> isRestoreInstruction(const MachineInstr &MI, - MachineFunction *MF, - Register &Reg); + std::optional<VarLoc::SpillLoc> isRestoreInstruction(const MachineInstr &MI, + MachineFunction *MF, + Register &Reg); /// Given a spill instruction, extract the register and offset used to /// address the spill location in a target independent way. VarLoc::SpillLoc extractSpillBaseRegAndOffset(const MachineInstr &MI); @@ -985,6 +1061,8 @@ private: VarLocMap &VarLocIDs, InstToEntryLocMap &EntryValTransfers, RegDefToInstMap &RegSetInstrs); + void transferWasmDef(MachineInstr &MI, OpenRangesSet &OpenRanges, + VarLocMap &VarLocIDs); bool transferTerminator(MachineBasicBlock *MBB, OpenRangesSet &OpenRanges, VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs); @@ -1110,13 +1188,13 @@ void VarLocBasedLDV::OpenRangesSet::insert(LocIndices VarLocIDs, /// Return the Loc ID of an entry value backup location, if it exists for the /// variable. -llvm::Optional<LocIndices> +std::optional<LocIndices> VarLocBasedLDV::OpenRangesSet::getEntryValueBackup(DebugVariable Var) { auto It = EntryValuesBackupVars.find(Var); if (It != EntryValuesBackupVars.end()) return It->second; - return llvm::None; + return std::nullopt; } void VarLocBasedLDV::collectIDsForRegs(VarLocsInRange &Collected, @@ -1203,7 +1281,7 @@ void VarLocBasedLDV::printVarLocInMBB(const MachineFunction &MF, for (const VarLoc &VL : VarLocs) { Out << " Var: " << VL.Var.getVariable()->getName(); Out << " MI: "; - VL.dump(TRI, Out); + VL.dump(TRI, TII, Out); } } Out << "\n"; @@ -1341,10 +1419,10 @@ void VarLocBasedLDV::transferDebugValue(const MachineInstr &MI, if (all_of(MI.debug_operands(), [](const MachineOperand &MO) { return (MO.isReg() && MO.getReg()) || MO.isImm() || MO.isFPImm() || - MO.isCImm(); + MO.isCImm() || MO.isTargetIndex(); })) { // Use normal VarLoc constructor for registers and immediates. - VarLoc VL(MI, LS); + VarLoc VL(MI); // End all previous ranges of VL.Var. OpenRanges.erase(VL); @@ -1357,7 +1435,7 @@ void VarLocBasedLDV::transferDebugValue(const MachineInstr &MI, // This must be an undefined location. If it has an open range, erase it. assert(MI.isUndefDebugValue() && "Unexpected non-undef DBG_VALUE encountered"); - VarLoc VL(MI, LS); + VarLoc VL(MI); OpenRanges.erase(VL); } } @@ -1398,7 +1476,7 @@ void VarLocBasedLDV::emitEntryValues(MachineInstr &MI, continue; auto DebugVar = VL.Var; - Optional<LocIndices> EntryValBackupIDs = + std::optional<LocIndices> EntryValBackupIDs = OpenRanges.getEntryValueBackup(DebugVar); // If the parameter has the entry value backup, it means we should @@ -1407,7 +1485,7 @@ void VarLocBasedLDV::emitEntryValues(MachineInstr &MI, continue; const VarLoc &EntryVL = VarLocIDs[EntryValBackupIDs->back()]; - VarLoc EntryLoc = VarLoc::CreateEntryLoc(EntryVL.MI, LS, EntryVL.Expr, + VarLoc EntryLoc = VarLoc::CreateEntryLoc(EntryVL.MI, EntryVL.Expr, EntryVL.Locs[0].Value.RegNo); LocIndices EntryValueIDs = VarLocIDs.insert(EntryLoc); assert(EntryValueIDs.size() == 1 && @@ -1454,7 +1532,7 @@ void VarLocBasedLDV::insertTransferDebugPair( ProcessVarLoc(VL); LLVM_DEBUG({ dbgs() << "Creating VarLoc for register copy:"; - VL.dump(TRI); + VL.dump(TRI, TII); }); return; } @@ -1467,7 +1545,7 @@ void VarLocBasedLDV::insertTransferDebugPair( ProcessVarLoc(VL); LLVM_DEBUG({ dbgs() << "Creating VarLoc for spill:"; - VL.dump(TRI); + VL.dump(TRI, TII); }); return; } @@ -1480,7 +1558,7 @@ void VarLocBasedLDV::insertTransferDebugPair( ProcessVarLoc(VL); LLVM_DEBUG({ dbgs() << "Creating VarLoc for restore:"; - VL.dump(TRI); + VL.dump(TRI, TII); }); return; } @@ -1509,8 +1587,7 @@ void VarLocBasedLDV::transferRegisterDef(MachineInstr &MI, SmallVector<const uint32_t *, 4> RegMasks; for (const MachineOperand &MO : MI.operands()) { // Determine whether the operand is a register def. - if (MO.isReg() && MO.isDef() && MO.getReg() && - Register::isPhysicalRegister(MO.getReg()) && + if (MO.isReg() && MO.isDef() && MO.getReg() && MO.getReg().isPhysical() && !(MI.isCall() && MO.getReg() == SP)) { // Remove ranges of all aliased registers. for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) @@ -1565,6 +1642,30 @@ void VarLocBasedLDV::transferRegisterDef(MachineInstr &MI, } } +void VarLocBasedLDV::transferWasmDef(MachineInstr &MI, + OpenRangesSet &OpenRanges, + VarLocMap &VarLocIDs) { + // If this is not a Wasm local.set or local.tee, which sets local values, + // return. + int Index; + int64_t Offset; + if (!TII->isExplicitTargetIndexDef(MI, Index, Offset)) + return; + + // Find the target indices killed by MI, and delete those variable locations + // from the open range. + VarLocsInRange KillSet; + VarLoc::WasmLoc Loc{Index, Offset}; + for (uint64_t ID : OpenRanges.getWasmVarLocs()) { + LocIndex Idx = LocIndex::fromRawInteger(ID); + const VarLoc &VL = VarLocIDs[Idx]; + assert(VL.containsWasmLocs() && "Broken VarLocSet?"); + if (VL.usesWasmLoc(Loc)) + KillSet.insert(ID); + } + OpenRanges.erase(KillSet, VarLocIDs, LocIndex::kWasmLocation); +} + bool VarLocBasedLDV::isSpillInstruction(const MachineInstr &MI, MachineFunction *MF) { // TODO: Handle multiple stores folded into one. @@ -1618,11 +1719,11 @@ bool VarLocBasedLDV::isLocationSpill(const MachineInstr &MI, return false; } -Optional<VarLocBasedLDV::VarLoc::SpillLoc> +std::optional<VarLocBasedLDV::VarLoc::SpillLoc> VarLocBasedLDV::isRestoreInstruction(const MachineInstr &MI, - MachineFunction *MF, Register &Reg) { + MachineFunction *MF, Register &Reg) { if (!MI.hasOneMemOperand()) - return None; + return std::nullopt; // FIXME: Handle folded restore instructions with more than one memory // operand. @@ -1630,7 +1731,7 @@ VarLocBasedLDV::isRestoreInstruction(const MachineInstr &MI, Reg = MI.getOperand(0).getReg(); return extractSpillBaseRegAndOffset(MI); } - return None; + return std::nullopt; } /// A spilled register may indicate that we have to end the current range of @@ -1647,7 +1748,7 @@ void VarLocBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI, MachineFunction *MF = MI.getMF(); TransferKind TKind; Register Reg; - Optional<VarLoc::SpillLoc> Loc; + std::optional<VarLoc::SpillLoc> Loc; LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump();); @@ -1777,7 +1878,7 @@ void VarLocBasedLDV::transferRegisterCopy(MachineInstr &MI, if (VL.isEntryValueBackupReg(SrcReg)) { LLVM_DEBUG(dbgs() << "Copy of the entry value: "; MI.dump();); VarLoc EntryValLocCopyBackup = - VarLoc::CreateEntryCopyBackupLoc(VL.MI, LS, VL.Expr, DestReg); + VarLoc::CreateEntryCopyBackupLoc(VL.MI, VL.Expr, DestReg); // Stop tracking the original entry value. OpenRanges.erase(VL); @@ -1818,7 +1919,7 @@ bool VarLocBasedLDV::transferTerminator(MachineBasicBlock *CurMBB, for (VarLoc &VL : VarLocs) { // Copy OpenRanges to OutLocs, if not already present. dbgs() << "Add to OutLocs in MBB #" << CurMBB->getNumber() << ": "; - VL.dump(TRI); + VL.dump(TRI, TII); } }); VarLocSet &VLS = getVarLocsInMBB(CurMBB, OutLocs); @@ -1903,6 +2004,7 @@ void VarLocBasedLDV::process(MachineInstr &MI, OpenRangesSet &OpenRanges, RegSetInstrs); transferRegisterDef(MI, OpenRanges, VarLocIDs, EntryValTransfers, RegSetInstrs); + transferWasmDef(MI, OpenRanges, VarLocIDs); transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers); transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers); } @@ -2058,10 +2160,13 @@ bool VarLocBasedLDV::isEntryValueCandidate( /// Collect all register defines (including aliases) for the given instruction. static void collectRegDefs(const MachineInstr &MI, DefinedRegsSet &Regs, const TargetRegisterInfo *TRI) { - for (const MachineOperand &MO : MI.operands()) - if (MO.isReg() && MO.isDef() && MO.getReg()) + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.isDef() && MO.getReg() && MO.getReg().isPhysical()) { + Regs.insert(MO.getReg()); for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) Regs.insert(*AI); + } + } } /// This routine records the entry values of function parameters. The values @@ -2090,7 +2195,7 @@ void VarLocBasedLDV::recordEntryValue(const MachineInstr &MI, // valid. It is valid until a parameter is not changed. DIExpression *NewExpr = DIExpression::prepend(MI.getDebugExpression(), DIExpression::EntryValue); - VarLoc EntryValLocAsBackup = VarLoc::CreateEntryBackupLoc(MI, LS, NewExpr); + VarLoc EntryValLocAsBackup = VarLoc::CreateEntryBackupLoc(MI, NewExpr); LocIndices EntryValLocIDs = VarLocIDs.insert(EntryValLocAsBackup); OpenRanges.insert(EntryValLocIDs, EntryValLocAsBackup); } @@ -2102,7 +2207,7 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC, unsigned InputBBLimit, unsigned InputDbgValLimit) { (void)DomTree; - LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n"); + LLVM_DEBUG(dbgs() << "\nDebug Range Extension: " << MF.getName() << "\n"); if (!MF.getFunction().getSubprogram()) // VarLocBaseLDV will already have removed all DBG_VALUEs. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp index 574c0f98161e..9603c1f01e08 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -59,6 +59,7 @@ #include <cassert> #include <iterator> #include <memory> +#include <optional> #include <utility> using namespace llvm; @@ -137,8 +138,7 @@ public: // Turn this into an undef debug value list; right now, the simplest form // of this is an expression with one arg, and an undef debug operand. Expression = - DIExpression::get(Expr.getContext(), {dwarf::DW_OP_LLVM_arg, 0, - dwarf::DW_OP_stack_value}); + DIExpression::get(Expr.getContext(), {dwarf::DW_OP_LLVM_arg, 0}); if (auto FragmentInfoOpt = Expr.getFragmentInfo()) Expression = *DIExpression::createFragmentExpression( Expression, FragmentInfoOpt->OffsetInBits, @@ -286,7 +286,7 @@ class LDVImpl; class UserValue { const DILocalVariable *Variable; ///< The debug info variable we are part of. /// The part of the variable we describe. - const Optional<DIExpression::FragmentInfo> Fragment; + const std::optional<DIExpression::FragmentInfo> Fragment; DebugLoc dl; ///< The debug location for the variable. This is ///< used by dwarf writer to find lexical scope. UserValue *leader; ///< Equivalence class leader. @@ -319,7 +319,7 @@ class UserValue { public: /// Create a new UserValue. UserValue(const DILocalVariable *var, - Optional<DIExpression::FragmentInfo> Fragment, DebugLoc L, + std::optional<DIExpression::FragmentInfo> Fragment, DebugLoc L, LocMap::Allocator &alloc) : Variable(var), Fragment(Fragment), dl(std::move(L)), leader(this), locInts(alloc) {} @@ -440,11 +440,12 @@ public: /// VNInfo. /// \param [out] Kills Append end points of VNI's live range to Kills. /// \param LIS Live intervals analysis. - void extendDef(SlotIndex Idx, DbgVariableValue DbgValue, - SmallDenseMap<unsigned, std::pair<LiveRange *, const VNInfo *>> - &LiveIntervalInfo, - Optional<std::pair<SlotIndex, SmallVector<unsigned>>> &Kills, - LiveIntervals &LIS); + void + extendDef(SlotIndex Idx, DbgVariableValue DbgValue, + SmallDenseMap<unsigned, std::pair<LiveRange *, const VNInfo *>> + &LiveIntervalInfo, + std::optional<std::pair<SlotIndex, SmallVector<unsigned>>> &Kills, + LiveIntervals &LIS); /// The value in LI may be copies to other registers. Determine if /// any of the copies are available at the kill points, and add defs if @@ -582,7 +583,7 @@ class LDVImpl { /// Find or create a UserValue. UserValue *getUserValue(const DILocalVariable *Var, - Optional<DIExpression::FragmentInfo> Fragment, + std::optional<DIExpression::FragmentInfo> Fragment, const DebugLoc &DL); /// Find the EC leader for VirtReg or null. @@ -763,14 +764,14 @@ void LDVImpl::print(raw_ostream &OS) { void UserValue::mapVirtRegs(LDVImpl *LDV) { for (unsigned i = 0, e = locations.size(); i != e; ++i) - if (locations[i].isReg() && - Register::isVirtualRegister(locations[i].getReg())) + if (locations[i].isReg() && locations[i].getReg().isVirtual()) LDV->mapVirtReg(locations[i].getReg(), this); } -UserValue *LDVImpl::getUserValue(const DILocalVariable *Var, - Optional<DIExpression::FragmentInfo> Fragment, - const DebugLoc &DL) { +UserValue * +LDVImpl::getUserValue(const DILocalVariable *Var, + std::optional<DIExpression::FragmentInfo> Fragment, + const DebugLoc &DL) { // FIXME: Handle partially overlapping fragments. See // https://reviews.llvm.org/D70121#1849741. DebugVariable ID(Var, Fragment, DL->getInlinedAt()); @@ -784,7 +785,7 @@ UserValue *LDVImpl::getUserValue(const DILocalVariable *Var, } void LDVImpl::mapVirtReg(Register VirtReg, UserValue *EC) { - assert(Register::isVirtualRegister(VirtReg) && "Only map VirtRegs"); + assert(VirtReg.isVirtual() && "Only map VirtRegs"); UserValue *&Leader = virtRegToEqClass[VirtReg]; Leader = UserValue::merge(Leader, EC); } @@ -820,7 +821,7 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { // will be incorrect. bool Discard = false; for (const MachineOperand &Op : MI.debug_operands()) { - if (Op.isReg() && Register::isVirtualRegister(Op.getReg())) { + if (Op.isReg() && Op.getReg().isVirtual()) { const Register Reg = Op.getReg(); if (!LIS->hasInterval(Reg)) { // The DBG_VALUE is described by a virtual register that does not have a @@ -873,12 +874,16 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { MachineBasicBlock::iterator LDVImpl::handleDebugInstr(MachineInstr &MI, SlotIndex Idx) { - assert(MI.isDebugValue() || MI.isDebugRef() || MI.isDebugPHI()); + assert(MI.isDebugValueLike() || MI.isDebugPHI()); // In instruction referencing mode, there should be no DBG_VALUE instructions // that refer to virtual registers. They might still refer to constants. - if (MI.isDebugValue()) - assert(!MI.getOperand(0).isReg() || !MI.getOperand(0).getReg().isVirtual()); + if (MI.isDebugValueLike()) + assert(none_of(MI.debug_operands(), + [](const MachineOperand &MO) { + return MO.isReg() && MO.getReg().isVirtual(); + }) && + "MIs should not refer to Virtual Registers in InstrRef mode."); // Unlink the instruction, store it in the debug instructions collection. auto NextInst = std::next(MI.getIterator()); @@ -955,7 +960,7 @@ void UserValue::extendDef( SlotIndex Idx, DbgVariableValue DbgValue, SmallDenseMap<unsigned, std::pair<LiveRange *, const VNInfo *>> &LiveIntervalInfo, - Optional<std::pair<SlotIndex, SmallVector<unsigned>>> &Kills, + std::optional<std::pair<SlotIndex, SmallVector<unsigned>>> &Kills, LiveIntervals &LIS) { SlotIndex Start = Idx; MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start); @@ -985,7 +990,7 @@ void UserValue::extendDef( Start = Start.getNextSlot(); if (I.value() != DbgValue || I.stop() != Start) { // Clear `Kills`, as we have a new def available. - Kills = None; + Kills = std::nullopt; return; } // This is a one-slot placeholder. Just skip it. @@ -996,7 +1001,7 @@ void UserValue::extendDef( if (I.valid() && I.start() < Stop) { Stop = I.start(); // Clear `Kills`, as we have a new def available. - Kills = None; + Kills = std::nullopt; } if (Start < Stop) { @@ -1012,9 +1017,8 @@ void UserValue::addDefsFromCopies( SmallVectorImpl<std::pair<SlotIndex, DbgVariableValue>> &NewDefs, MachineRegisterInfo &MRI, LiveIntervals &LIS) { // Don't track copies from physregs, there are too many uses. - if (any_of(LocIntervals, [](auto LocI) { - return !Register::isVirtualRegister(LocI.second->reg()); - })) + if (any_of(LocIntervals, + [](auto LocI) { return !LocI.second->reg().isVirtual(); })) return; // Collect all the (vreg, valno) pairs that are copies of LI. @@ -1035,7 +1039,7 @@ void UserValue::addDefsFromCopies( // arguments, and the argument registers are always call clobbered. We are // better off in the source register which could be a callee-saved // register, or it could be spilled. - if (!Register::isVirtualRegister(DstReg)) + if (!DstReg.isVirtual()) continue; // Is the value extended to reach this copy? If not, another def may be @@ -1114,7 +1118,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, bool ShouldExtendDef = false; for (unsigned LocNo : DbgValue.loc_nos()) { const MachineOperand &LocMO = locations[LocNo]; - if (!LocMO.isReg() || !Register::isVirtualRegister(LocMO.getReg())) { + if (!LocMO.isReg() || !LocMO.getReg().isVirtual()) { ShouldExtendDef |= !LocMO.isReg(); continue; } @@ -1129,7 +1133,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, LIs[LocNo] = {LI, VNI}; } if (ShouldExtendDef) { - Optional<std::pair<SlotIndex, SmallVector<unsigned>>> Kills; + std::optional<std::pair<SlotIndex, SmallVector<unsigned>>> Kills; extendDef(Idx, DbgValue, LIs, Kills, LIS); if (Kills) { @@ -1522,8 +1526,7 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const MachineFunction &MF, unsigned SpillOffset = 0; MachineOperand Loc = locations[I]; // Only virtual registers are rewritten. - if (Loc.isReg() && Loc.getReg() && - Register::isVirtualRegister(Loc.getReg())) { + if (Loc.isReg() && Loc.getReg() && Loc.getReg().isVirtual()) { Register VirtReg = Loc.getReg(); if (VRM.isAssignedReg(VirtReg) && Register::isPhysicalRegister(VRM.getPhys(VirtReg))) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp index 9378aaeb181c..7cd3d26cf5b3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp @@ -963,7 +963,7 @@ void LiveInterval::computeSubRangeUndefs(SmallVectorImpl<SlotIndex> &Undefs, LaneBitmask LaneMask, const MachineRegisterInfo &MRI, const SlotIndexes &Indexes) const { - assert(Register::isVirtualRegister(reg())); + assert(reg().isVirtual()); LaneBitmask VRegMask = MRI.getMaxLaneMaskForVReg(reg()); assert((VRegMask & LaneMask).any()); const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp index 3176d73b35f6..ccc5ae98086e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp @@ -51,7 +51,7 @@ void LiveIntervalCalc::calculate(LiveInterval &LI, bool TrackSubRegs) { // Visit all def operands. If the same instruction has multiple defs of Reg, // createDeadDef() will deduplicate. const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); - unsigned Reg = LI.reg(); + Register Reg = LI.reg(); for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { if (!MO.isDef() && !MO.readsReg()) continue; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp index 8a76048bb8c4..a49f6b0604c5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp @@ -180,7 +180,7 @@ LLVM_DUMP_METHOD void LiveIntervals::dumpInstrs() const { #endif LiveInterval *LiveIntervals::createInterval(Register reg) { - float Weight = Register::isPhysicalRegister(reg) ? huge_valf : 0.0F; + float Weight = reg.isPhysical() ? huge_valf : 0.0F; return new LiveInterval(reg, Weight); } @@ -449,8 +449,7 @@ void LiveIntervals::extendSegmentsToUses(LiveRange &Segments, bool LiveIntervals::shrinkToUses(LiveInterval *li, SmallVectorImpl<MachineInstr*> *dead) { LLVM_DEBUG(dbgs() << "Shrink: " << *li << '\n'); - assert(Register::isVirtualRegister(li->reg()) && - "Can only shrink virtual registers"); + assert(li->reg().isVirtual() && "Can only shrink virtual registers"); // Shrink subregister live ranges. bool NeedsCleanup = false; @@ -508,7 +507,6 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, bool LiveIntervals::computeDeadValues(LiveInterval &LI, SmallVectorImpl<MachineInstr*> *dead) { bool MayHaveSplitComponents = false; - bool HaveDeadDef = false; for (VNInfo *VNI : LI.valnos) { if (VNI->isUnused()) @@ -534,29 +532,25 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, VNI->markUnused(); LI.removeSegment(I); LLVM_DEBUG(dbgs() << "Dead PHI at " << Def << " may separate interval\n"); - MayHaveSplitComponents = true; } else { // This is a dead def. Make sure the instruction knows. MachineInstr *MI = getInstructionFromIndex(Def); assert(MI && "No instruction defining live value"); MI->addRegisterDead(LI.reg(), TRI); - if (HaveDeadDef) - MayHaveSplitComponents = true; - HaveDeadDef = true; if (dead && MI->allDefsAreDead()) { LLVM_DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI); dead->push_back(MI); } } + MayHaveSplitComponents = true; } return MayHaveSplitComponents; } void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, Register Reg) { LLVM_DEBUG(dbgs() << "Shrink: " << SR << '\n'); - assert(Register::isVirtualRegister(Reg) && - "Can only shrink virtual registers"); + assert(Reg.isVirtual() && "Can only shrink virtual registers"); // Find all the values used, including PHI kills. ShrinkToUsesWorkList WorkList; @@ -1025,7 +1019,7 @@ public: Register Reg = MO.getReg(); if (!Reg) continue; - if (Register::isVirtualRegister(Reg)) { + if (Reg.isVirtual()) { LiveInterval &LI = LIS.getInterval(Reg); if (LI.hasSubRanges()) { unsigned SubReg = MO.getSubReg(); @@ -1079,7 +1073,7 @@ private: return; LLVM_DEBUG({ dbgs() << " "; - if (Register::isVirtualRegister(Reg)) { + if (Reg.isVirtual()) { dbgs() << printReg(Reg); if (LaneMask.any()) dbgs() << " L" << PrintLaneMask(LaneMask); @@ -1455,7 +1449,7 @@ private: // Return the last use of reg between NewIdx and OldIdx. SlotIndex findLastUseBefore(SlotIndex Before, Register Reg, LaneBitmask LaneMask) { - if (Register::isVirtualRegister(Reg)) { + if (Reg.isVirtual()) { SlotIndex LastUse = Before; for (MachineOperand &MO : MRI.use_nodbg_operands(Reg)) { if (MO.isUndef()) @@ -1499,8 +1493,7 @@ private: // Check if MII uses Reg. for (MIBundleOperands MO(*MII); MO.isValid(); ++MO) - if (MO->isReg() && !MO->isUndef() && - Register::isPhysicalRegister(MO->getReg()) && + if (MO->isReg() && !MO->isUndef() && MO->getReg().isPhysical() && TRI.hasRegUnit(MO->getReg(), Reg)) return Idx.getRegSlot(); } @@ -1747,9 +1740,8 @@ void LiveIntervals::splitSeparateComponents(LiveInterval &LI, return; LLVM_DEBUG(dbgs() << " Split " << NumComp << " components: " << LI << '\n'); Register Reg = LI.reg(); - const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); for (unsigned I = 1; I < NumComp; ++I) { - Register NewVReg = MRI->createVirtualRegister(RegClass); + Register NewVReg = MRI->cloneVirtualRegister(Reg); LiveInterval &NewLI = createEmptyInterval(NewVReg); SplitLIs.push_back(&NewLI); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp index abf36b3f4c67..d8b024fbdfea 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -24,15 +24,16 @@ using namespace llvm; #define DEBUG_TYPE "regalloc" -STATISTIC(NumDCEDeleted, "Number of instructions deleted by DCE"); -STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE"); -STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE"); +STATISTIC(NumDCEDeleted, "Number of instructions deleted by DCE"); +STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE"); +STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE"); +STATISTIC(NumReMaterialization, "Number of instructions rematerialized"); void LiveRangeEdit::Delegate::anchor() { } LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(Register OldReg, bool createSubRanges) { - Register VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + Register VReg = MRI.cloneVirtualRegister(OldReg); if (VRM) VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); @@ -52,7 +53,7 @@ LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(Register OldReg, } Register LiveRangeEdit::createFrom(Register OldReg) { - Register VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + Register VReg = MRI.cloneVirtualRegister(OldReg); if (VRM) { VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); } @@ -113,7 +114,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, // We can't remat physreg uses, unless it is a constant or target wants // to ignore this use. - if (Register::isPhysicalRegister(MO.getReg())) { + if (MO.getReg().isPhysical()) { if (MRI.isConstantPhysReg(MO.getReg()) || TII.isIgnorableUse(MO)) continue; return false; @@ -134,9 +135,11 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, return false; // Check that subrange is live at UseIdx. - if (MO.getSubReg()) { + if (li.hasSubRanges()) { const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); - LaneBitmask LM = TRI->getSubRegIndexLaneMask(MO.getSubReg()); + unsigned SubReg = MO.getSubReg(); + LaneBitmask LM = SubReg ? TRI->getSubRegIndexLaneMask(SubReg) + : MRI.getMaxLaneMaskForVReg(MO.getReg()); for (LiveInterval::SubRange &SR : li.subranges()) { if ((SR.LaneMask & LM).none()) continue; @@ -181,14 +184,20 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB, unsigned DestReg, const Remat &RM, const TargetRegisterInfo &tri, - bool Late) { + bool Late, + unsigned SubIdx, + MachineInstr *ReplaceIndexMI) { assert(RM.OrigMI && "Invalid remat"); - TII.reMaterialize(MBB, MI, DestReg, 0, *RM.OrigMI, tri); + TII.reMaterialize(MBB, MI, DestReg, SubIdx, *RM.OrigMI, tri); // DestReg of the cloned instruction cannot be Dead. Set isDead of DestReg // to false anyway in case the isDead flag of RM.OrigMI's dest register // is true. (*--MI).getOperand(0).setIsDead(false); Rematted.insert(RM.ParentVNI); + ++NumReMaterialization; + + if (ReplaceIndexMI) + return LIS.ReplaceMachineInstrInMaps(*ReplaceIndexMI, *MI).getRegSlot(); return LIS.getSlotIndexes()->insertMachineInstrInMaps(*MI, Late).getRegSlot(); } @@ -309,7 +318,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { MI->getDesc().getNumDefs() == 1) { Dest = MI->getOperand(0).getReg(); DestSubReg = MI->getOperand(0).getSubReg(); - unsigned Original = VRM->getOriginal(Dest); + Register Original = VRM->getOriginal(Dest); LiveInterval &OrigLI = LIS.getInterval(Original); VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx); // The original live-range may have been shrunk to @@ -327,7 +336,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { if (!MO.isReg()) continue; Register Reg = MO.getReg(); - if (!Register::isVirtualRegister(Reg)) { + if (!Reg.isVirtual()) { // Check if MI reads any unreserved physregs. if (Reg && MO.readsReg() && !MRI.isReserved(Reg)) ReadsPhysRegs = true; @@ -369,7 +378,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { // Remove all operands that aren't physregs. for (unsigned i = MI->getNumOperands(); i; --i) { const MachineOperand &MO = MI->getOperand(i-1); - if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) + if (MO.isReg() && MO.getReg().isPhysical()) continue; MI->removeOperand(i-1); } @@ -439,7 +448,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead, LiveInterval *LI = ToShrink.pop_back_val(); if (foldAsLoad(LI, Dead)) continue; - unsigned VReg = LI->reg(); + Register VReg = LI->reg(); if (TheDelegate) TheDelegate->LRE_WillShrinkVirtReg(VReg); if (!LIS.shrinkToUses(LI, &Dead)) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp index 8e56985246db..93f5314539cd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp @@ -176,7 +176,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { Register Reg = MO.getReg(); // Do not move the instruction if it def/uses a physical register, // unless it is a constant physical register or a noreg. - if (!Register::isVirtualRegister(Reg)) { + if (!Reg.isVirtual()) { if (!Reg || MRI.isConstantPhysReg(Reg)) continue; Insert = nullptr; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp index d8d8bd5d61a2..34de09dd2944 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp @@ -22,8 +22,10 @@ using namespace llvm; void LiveRegUnits::removeRegsNotPreserved(const uint32_t *RegMask) { for (unsigned U = 0, E = TRI->getNumRegUnits(); U != E; ++U) { for (MCRegUnitRootIterator RootReg(U, TRI); RootReg.isValid(); ++RootReg) { - if (MachineOperand::clobbersPhysReg(RegMask, *RootReg)) + if (MachineOperand::clobbersPhysReg(RegMask, *RootReg)) { Units.reset(U); + break; + } } } } @@ -31,42 +33,54 @@ void LiveRegUnits::removeRegsNotPreserved(const uint32_t *RegMask) { void LiveRegUnits::addRegsInMask(const uint32_t *RegMask) { for (unsigned U = 0, E = TRI->getNumRegUnits(); U != E; ++U) { for (MCRegUnitRootIterator RootReg(U, TRI); RootReg.isValid(); ++RootReg) { - if (MachineOperand::clobbersPhysReg(RegMask, *RootReg)) + if (MachineOperand::clobbersPhysReg(RegMask, *RootReg)) { Units.set(U); + break; + } } } } void LiveRegUnits::stepBackward(const MachineInstr &MI) { // Remove defined registers and regmask kills from the set. - for (const MachineOperand &MOP : phys_regs_and_masks(MI)) { + for (const MachineOperand &MOP : MI.operands()) { + if (MOP.isReg()) { + if (MOP.isDef() && MOP.getReg().isPhysical()) + removeReg(MOP.getReg()); + continue; + } + if (MOP.isRegMask()) { removeRegsNotPreserved(MOP.getRegMask()); continue; } - - if (MOP.isDef()) - removeReg(MOP.getReg()); } // Add uses to the set. - for (const MachineOperand &MOP : phys_regs_and_masks(MI)) { + for (const MachineOperand &MOP : MI.operands()) { if (!MOP.isReg() || !MOP.readsReg()) continue; - addReg(MOP.getReg()); + + if (MOP.getReg().isPhysical()) + addReg(MOP.getReg()); } } void LiveRegUnits::accumulate(const MachineInstr &MI) { // Add defs, uses and regmask clobbers to the set. - for (const MachineOperand &MOP : phys_regs_and_masks(MI)) { + for (const MachineOperand &MOP : MI.operands()) { + if (MOP.isReg()) { + if (!MOP.getReg().isPhysical()) + continue; + if (MOP.isDef() || MOP.readsReg()) + addReg(MOP.getReg()); + continue; + } + if (MOP.isRegMask()) { addRegsInMask(MOP.getRegMask()); continue; } - if (!MOP.isDef() && !MOP.readsReg()) - continue; - addReg(MOP.getReg()); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp index 40250171fe1e..34c81c92707e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp @@ -518,7 +518,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI, continue; Register MOReg = MO.getReg(); if (MO.isUse()) { - if (!(Register::isPhysicalRegister(MOReg) && MRI->isReserved(MOReg))) + if (!(MOReg.isPhysical() && MRI->isReserved(MOReg))) MO.setIsKill(false); if (MO.readsReg()) UseRegs.push_back(MOReg); @@ -526,7 +526,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI, assert(MO.isDef()); // FIXME: We should not remove any dead flags. However the MIPS RDDSP // instruction needs it at the moment: http://llvm.org/PR27116. - if (Register::isPhysicalRegister(MOReg) && !MRI->isReserved(MOReg)) + if (MOReg.isPhysical() && !MRI->isReserved(MOReg)) MO.setIsDead(false); DefRegs.push_back(MOReg); } @@ -762,7 +762,7 @@ void LiveVariables::removeVirtualRegistersKilled(MachineInstr &MI) { if (MO.isReg() && MO.isKill()) { MO.setIsKill(false); Register Reg = MO.getReg(); - if (Register::isVirtualRegister(Reg)) { + if (Reg.isVirtual()) { bool removed = getVarInfo(Reg).removeKill(MI); assert(removed && "kill not in register's VarInfo?"); (void)removed; @@ -850,7 +850,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB, // Record all vreg defs and kills of all instructions in SuccBB. for (; BBI != BBE; ++BBI) { for (const MachineOperand &Op : BBI->operands()) { - if (Op.isReg() && Register::isVirtualRegister(Op.getReg())) { + if (Op.isReg() && Op.getReg().isVirtual()) { if (Op.isDef()) Defs.insert(Op.getReg()); else if (Op.isKill()) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index 5f54d7cc8472..e491ed12034d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -288,7 +288,6 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // stack frame. If it wants one, re-use a suitable one we've previously // allocated, or if there isn't one that fits the bill, allocate a new one // and ask the target to create a defining instruction for it. - bool UsedBaseReg = false; MachineFrameInfo &MFI = Fn.getFrameInfo(); const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); @@ -386,7 +385,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // instruction itself will be taken into account by the target, // so we don't have to adjust for it here when reusing a base // register. - if (UsedBaseReg && + if (BaseReg.isValid() && lookupCandidateBaseReg(BaseReg, BaseOffset, FrameSizeAdjust, LocalOffset, MI, TRI)) { LLVM_DEBUG(dbgs() << " Reusing base register " << BaseReg << "\n"); @@ -396,8 +395,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // No previously defined register was in range, so create a new one. int64_t InstrOffset = TRI->getFrameIndexInstrOffset(&MI, idx); - int64_t PrevBaseOffset = BaseOffset; - BaseOffset = FrameSizeAdjust + LocalOffset + InstrOffset; + int64_t CandBaseOffset = FrameSizeAdjust + LocalOffset + InstrOffset; // We'd like to avoid creating single-use virtual base registers. // Because the FrameRefs are in sorted order, and we've already @@ -406,12 +404,13 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // then don't bother creating it. if (ref + 1 >= e || !lookupCandidateBaseReg( - BaseReg, BaseOffset, FrameSizeAdjust, + BaseReg, CandBaseOffset, FrameSizeAdjust, FrameReferenceInsns[ref + 1].getLocalOffset(), - *FrameReferenceInsns[ref + 1].getMachineInstr(), TRI)) { - BaseOffset = PrevBaseOffset; + *FrameReferenceInsns[ref + 1].getMachineInstr(), TRI)) continue; - } + + // Save the base offset. + BaseOffset = CandBaseOffset; // Tell the target to insert the instruction to initialize // the base register. @@ -428,7 +427,6 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { Offset = -InstrOffset; ++NumBaseRegisters; - UsedBaseReg = true; } assert(BaseReg && "Unable to allocate virtual base register!"); @@ -440,5 +438,5 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { ++NumReplacements; } - return UsedBaseReg; + return BaseReg.isValid(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp index efebb18c9908..5b388be27839 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MBFIWrapper.h" +#include <optional> using namespace llvm; @@ -31,7 +31,7 @@ void MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB, MergedBBFreq[MBB] = F; } -Optional<uint64_t> +std::optional<uint64_t> MBFIWrapper::getBlockProfileCount(const MachineBasicBlock *MBB) const { auto I = MergedBBFreq.find(MBB); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp index 3e7b4dbc9d71..21b849244d9b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -155,7 +155,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, if (!MO.isReg()) continue; - if (Register::isVirtualRegister(MO.getReg())) + if (MO.getReg().isVirtual()) continue; if (!MO.isDef()) @@ -172,7 +172,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, continue; MachineOperand &MO = II->getOperand(0); - if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !MO.getReg().isVirtual()) continue; if (!MO.isDef()) continue; @@ -185,7 +185,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, } if (II->getOperand(i).isReg()) { - if (!Register::isVirtualRegister(II->getOperand(i).getReg())) + if (!II->getOperand(i).getReg().isVirtual()) if (!llvm::is_contained(PhysRegDefs, II->getOperand(i).getReg())) { continue; } @@ -307,9 +307,9 @@ static bool propagateLocalCopies(MachineBasicBlock *MBB) { const Register Dst = MI->getOperand(0).getReg(); const Register Src = MI->getOperand(1).getReg(); - if (!Register::isVirtualRegister(Dst)) + if (!Dst.isVirtual()) continue; - if (!Register::isVirtualRegister(Src)) + if (!Src.isVirtual()) continue; // Not folding COPY instructions if regbankselect has not set the RCs. // Why are we only considering Register Classes? Because the verifier diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp index 3152102410d7..ad8a17f25ec5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp @@ -70,7 +70,7 @@ static uint64_t getCallStackHash(const MachineBasicBlock &BB, bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) { if (!EnableFSDiscriminator) return false; - if (!MF.getFunction().isDebugInfoForProfiling()) + if (!MF.getFunction().shouldEmitDebugInfoForProfiling()) return false; bool Changed = false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp index b0daa20913f5..c136b08223b8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "MILexer.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" @@ -33,7 +32,7 @@ class Cursor { const char *End = nullptr; public: - Cursor(NoneType) {} + Cursor(std::nullopt_t) {} explicit Cursor(StringRef Str) { Ptr = Str.data(); @@ -159,7 +158,7 @@ static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback) { ErrorCallback( C.location(), "end of machine instruction reached before the closing '\"'"); - return None; + return std::nullopt; } } C.advance(); @@ -217,6 +216,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("nofpexcept", MIToken::kw_nofpexcept) .Case("debug-location", MIToken::kw_debug_location) .Case("debug-instr-number", MIToken::kw_debug_instr_number) + .Case("dbg-instr-ref", MIToken::kw_dbg_instr_ref) .Case("same_value", MIToken::kw_cfi_same_value) .Case("offset", MIToken::kw_cfi_offset) .Case("rel_offset", MIToken::kw_cfi_rel_offset) @@ -258,7 +258,6 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("call-entry", MIToken::kw_call_entry) .Case("custom", MIToken::kw_custom) .Case("liveout", MIToken::kw_liveout) - .Case("address-taken", MIToken::kw_address_taken) .Case("landing-pad", MIToken::kw_landing_pad) .Case("inlineasm-br-indirect-target", MIToken::kw_inlineasm_br_indirect_target) @@ -271,16 +270,22 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol) .Case("post-instr-symbol", MIToken::kw_post_instr_symbol) .Case("heap-alloc-marker", MIToken::kw_heap_alloc_marker) + .Case("pcsections", MIToken::kw_pcsections) + .Case("cfi-type", MIToken::kw_cfi_type) .Case("bbsections", MIToken::kw_bbsections) + .Case("bb_id", MIToken::kw_bb_id) .Case("unknown-size", MIToken::kw_unknown_size) .Case("unknown-address", MIToken::kw_unknown_address) .Case("distinct", MIToken::kw_distinct) + .Case("ir-block-address-taken", MIToken::kw_ir_block_address_taken) + .Case("machine-block-address-taken", + MIToken::kw_machine_block_address_taken) .Default(MIToken::Identifier); } static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { if (!isalpha(C.peek()) && C.peek() != '_') - return None; + return std::nullopt; auto Range = C; while (isIdentifierChar(C.peek())) C.advance(); @@ -294,7 +299,7 @@ static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback) { bool IsReference = C.remaining().startswith("%bb."); if (!IsReference && !C.remaining().startswith("bb.")) - return None; + return std::nullopt; auto Range = C; unsigned PrefixLength = IsReference ? 4 : 3; C.advance(PrefixLength); // Skip '%bb.' or 'bb.' @@ -328,7 +333,7 @@ static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token, static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, MIToken::TokenKind Kind) { if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) - return None; + return std::nullopt; auto Range = C; C.advance(Rule.size()); auto NumberRange = C; @@ -341,7 +346,7 @@ static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, MIToken::TokenKind Kind) { if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) - return None; + return std::nullopt; auto Range = C; C.advance(Rule.size()); auto NumberRange = C; @@ -381,7 +386,7 @@ static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback) { const StringRef Rule = "%subreg."; if (!C.remaining().startswith(Rule)) - return None; + return std::nullopt; return lexName(C, Token, MIToken::SubRegisterIndex, Rule.size(), ErrorCallback); } @@ -390,7 +395,7 @@ static Cursor maybeLexIRBlock(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback) { const StringRef Rule = "%ir-block."; if (!C.remaining().startswith(Rule)) - return None; + return std::nullopt; if (isdigit(C.peek(Rule.size()))) return maybeLexIndex(C, Token, Rule, MIToken::IRBlock); return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback); @@ -400,7 +405,7 @@ static Cursor maybeLexIRValue(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback) { const StringRef Rule = "%ir."; if (!C.remaining().startswith(Rule)) - return None; + return std::nullopt; if (isdigit(C.peek(Rule.size()))) return maybeLexIndex(C, Token, Rule, MIToken::IRValue); return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback); @@ -409,7 +414,7 @@ static Cursor maybeLexIRValue(Cursor C, MIToken &Token, static Cursor maybeLexStringConstant(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback) { if (C.peek() != '"') - return None; + return std::nullopt; return lexName(C, Token, MIToken::StringConstant, /*PrefixLength=*/0, ErrorCallback); } @@ -443,7 +448,7 @@ static Cursor lexNamedVirtualRegister(Cursor C, MIToken &Token) { static Cursor maybeLexRegister(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback) { if (C.peek() != '%' && C.peek() != '$') - return None; + return std::nullopt; if (C.peek() == '%') { if (isdigit(C.peek(1))) @@ -452,7 +457,7 @@ static Cursor maybeLexRegister(Cursor C, MIToken &Token, if (isRegisterChar(C.peek(1))) return lexNamedVirtualRegister(C, Token); - return None; + return std::nullopt; } assert(C.peek() == '$'); @@ -468,7 +473,7 @@ static Cursor maybeLexRegister(Cursor C, MIToken &Token, static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback) { if (C.peek() != '@') - return None; + return std::nullopt; if (!isdigit(C.peek(1))) return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1, ErrorCallback); @@ -485,7 +490,7 @@ static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token, static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback) { if (C.peek() != '&') - return None; + return std::nullopt; return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1, ErrorCallback); } @@ -494,7 +499,7 @@ static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback) { const StringRef Rule = "<mcsymbol "; if (!C.remaining().startswith(Rule)) - return None; + return std::nullopt; auto Start = C; C.advance(Rule.size()); @@ -559,7 +564,7 @@ static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) { static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) { if (C.peek() != '0' || (C.peek(1) != 'x' && C.peek(1) != 'X')) - return None; + return std::nullopt; Cursor Range = C; C.advance(2); unsigned PrefLen = 2; @@ -571,7 +576,7 @@ static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) { C.advance(); StringRef StrVal = Range.upto(C); if (StrVal.size() <= PrefLen) - return None; + return std::nullopt; if (PrefLen == 2) Token.reset(MIToken::HexLiteral, Range.upto(C)); else // It must be 3, which means that there was a floating-point prefix. @@ -581,7 +586,7 @@ static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) { static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) { if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1)))) - return None; + return std::nullopt; auto Range = C; C.advance(); while (isdigit(C.peek())) @@ -607,7 +612,7 @@ static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { static Cursor maybeLexExclaim(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback) { if (C.peek() != '!') - return None; + return std::nullopt; auto Range = C; C.advance(1); if (isdigit(C.peek()) || !isIdentifierChar(C.peek())) { @@ -664,7 +669,7 @@ static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { } else Kind = symbolToken(C.peek()); if (Kind == MIToken::Error) - return None; + return std::nullopt; auto Range = C; C.advance(Length); Token.reset(Kind, Range.upto(C)); @@ -673,7 +678,7 @@ static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { static Cursor maybeLexNewline(Cursor C, MIToken &Token) { if (!isNewlineChar(C.peek())) - return None; + return std::nullopt; auto Range = C; C.advance(); Token.reset(MIToken::Newline, Range.upto(C)); @@ -683,7 +688,7 @@ static Cursor maybeLexNewline(Cursor C, MIToken &Token) { static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback) { if (C.peek() != '`') - return None; + return std::nullopt; auto Range = C; C.advance(); auto StrRange = C; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h index 70d17f819ce3..ac484cdfd6c8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -75,6 +75,7 @@ struct MIToken { kw_nofpexcept, kw_debug_location, kw_debug_instr_number, + kw_dbg_instr_ref, kw_cfi_same_value, kw_cfi_offset, kw_cfi_rel_offset, @@ -114,7 +115,6 @@ struct MIToken { kw_call_entry, kw_custom, kw_liveout, - kw_address_taken, kw_landing_pad, kw_inlineasm_br_indirect_target, kw_ehfunclet_entry, @@ -126,9 +126,14 @@ struct MIToken { kw_pre_instr_symbol, kw_post_instr_symbol, kw_heap_alloc_marker, + kw_pcsections, + kw_cfi_type, kw_bbsections, + kw_bb_id, kw_unknown_size, kw_unknown_address, + kw_ir_block_address_taken, + kw_machine_block_address_taken, // Metadata types. kw_distinct, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp index e3d6b59c5077..525f49347fc4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -16,8 +16,6 @@ #include "llvm/ADT/APSInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" @@ -377,10 +375,11 @@ struct ParsedMachineOperand { MachineOperand Operand; StringRef::iterator Begin; StringRef::iterator End; - Optional<unsigned> TiedDefIdx; + std::optional<unsigned> TiedDefIdx; ParsedMachineOperand(const MachineOperand &Operand, StringRef::iterator Begin, - StringRef::iterator End, Optional<unsigned> &TiedDefIdx) + StringRef::iterator End, + std::optional<unsigned> &TiedDefIdx) : Operand(Operand), Begin(Begin), End(End), TiedDefIdx(TiedDefIdx) { if (TiedDefIdx) assert(Operand.isReg() && Operand.isUse() && @@ -449,7 +448,8 @@ public: bool parseSubRegisterIndex(unsigned &SubReg); bool parseRegisterTiedDefIndex(unsigned &TiedDefIdx); bool parseRegisterOperand(MachineOperand &Dest, - Optional<unsigned> &TiedDefIdx, bool IsDef = false); + std::optional<unsigned> &TiedDefIdx, + bool IsDef = false); bool parseImmediateOperand(MachineOperand &Dest); bool parseIRConstant(StringRef::iterator Loc, StringRef StringValue, const Constant *&C); @@ -485,19 +485,22 @@ public: bool parsePredicateOperand(MachineOperand &Dest); bool parseShuffleMaskOperand(MachineOperand &Dest); bool parseTargetIndexOperand(MachineOperand &Dest); + bool parseDbgInstrRefOperand(MachineOperand &Dest); bool parseCustomRegisterMaskOperand(MachineOperand &Dest); bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest); bool parseMachineOperand(const unsigned OpCode, const unsigned OpIdx, MachineOperand &Dest, - Optional<unsigned> &TiedDefIdx); + std::optional<unsigned> &TiedDefIdx); bool parseMachineOperandAndTargetFlags(const unsigned OpCode, const unsigned OpIdx, MachineOperand &Dest, - Optional<unsigned> &TiedDefIdx); + std::optional<unsigned> &TiedDefIdx); bool parseOffset(int64_t &Offset); + bool parseIRBlockAddressTaken(BasicBlock *&BB); bool parseAlignment(uint64_t &Alignment); bool parseAddrspace(unsigned &Addrspace); - bool parseSectionID(Optional<MBBSectionID> &SID); + bool parseSectionID(std::optional<MBBSectionID> &SID); + bool parseBBID(std::optional<unsigned> &BBID); bool parseOperandsOffset(MachineOperand &Op); bool parseIRValue(const Value *&V); bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags); @@ -508,6 +511,7 @@ public: bool parseMachineMemoryOperand(MachineMemOperand *&Dest); bool parsePreOrPostInstrSymbol(MCSymbol *&Symbol); bool parseHeapAllocMarker(MDNode *&Node); + bool parsePCSections(MDNode *&Node); bool parseTargetImmMnemonic(const unsigned OpCode, const unsigned OpIdx, MachineOperand &Dest, const MIRFormatter &MF); @@ -593,7 +597,7 @@ bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) { // Create a diagnostic for a YAML string literal. Error = SMDiagnostic(SM, SMLoc(), Buffer.getBufferIdentifier(), 1, Loc - Source.data(), SourceMgr::DK_Error, Msg.str(), - Source, None, None); + Source, std::nullopt, std::nullopt); return true; } @@ -639,7 +643,7 @@ bool MIParser::consumeIfPresent(MIToken::TokenKind TokenKind) { } // Parse Machine Basic Block Section ID. -bool MIParser::parseSectionID(Optional<MBBSectionID> &SID) { +bool MIParser::parseSectionID(std::optional<MBBSectionID> &SID) { assert(Token.is(MIToken::kw_bbsections)); lex(); if (Token.is(MIToken::IntegerLiteral)) { @@ -660,6 +664,18 @@ bool MIParser::parseSectionID(Optional<MBBSectionID> &SID) { return false; } +// Parse Machine Basic Block ID. +bool MIParser::parseBBID(std::optional<unsigned> &BBID) { + assert(Token.is(MIToken::kw_bb_id)); + lex(); + unsigned Value = 0; + if (getUnsigned(Value)) + return error("Unknown BB ID"); + BBID = Value; + lex(); + return false; +} + bool MIParser::parseBasicBlockDefinition( DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) { assert(Token.is(MIToken::MachineBasicBlockLabel)); @@ -669,21 +685,27 @@ bool MIParser::parseBasicBlockDefinition( auto Loc = Token.location(); auto Name = Token.stringValue(); lex(); - bool HasAddressTaken = false; + bool MachineBlockAddressTaken = false; + BasicBlock *AddressTakenIRBlock = nullptr; bool IsLandingPad = false; bool IsInlineAsmBrIndirectTarget = false; bool IsEHFuncletEntry = false; - Optional<MBBSectionID> SectionID; + std::optional<MBBSectionID> SectionID; uint64_t Alignment = 0; + std::optional<unsigned> BBID; BasicBlock *BB = nullptr; if (consumeIfPresent(MIToken::lparen)) { do { // TODO: Report an error when multiple same attributes are specified. switch (Token.kind()) { - case MIToken::kw_address_taken: - HasAddressTaken = true; + case MIToken::kw_machine_block_address_taken: + MachineBlockAddressTaken = true; lex(); break; + case MIToken::kw_ir_block_address_taken: + if (parseIRBlockAddressTaken(AddressTakenIRBlock)) + return true; + break; case MIToken::kw_landing_pad: IsLandingPad = true; lex(); @@ -701,6 +723,7 @@ bool MIParser::parseBasicBlockDefinition( return true; break; case MIToken::IRBlock: + case MIToken::NamedIRBlock: // TODO: Report an error when both name and ir block are specified. if (parseIRBlock(BB, MF.getFunction())) return true; @@ -710,6 +733,10 @@ bool MIParser::parseBasicBlockDefinition( if (parseSectionID(SectionID)) return true; break; + case MIToken::kw_bb_id: + if (parseBBID(BBID)) + return true; + break; default: break; } @@ -736,15 +763,24 @@ bool MIParser::parseBasicBlockDefinition( Twine(ID)); if (Alignment) MBB->setAlignment(Align(Alignment)); - if (HasAddressTaken) - MBB->setHasAddressTaken(); + if (MachineBlockAddressTaken) + MBB->setMachineBlockAddressTaken(); + if (AddressTakenIRBlock) + MBB->setAddressTakenIRBlock(AddressTakenIRBlock); MBB->setIsEHPad(IsLandingPad); MBB->setIsInlineAsmBrIndirectTarget(IsInlineAsmBrIndirectTarget); MBB->setIsEHFuncletEntry(IsEHFuncletEntry); if (SectionID) { - MBB->setSectionID(SectionID.value()); + MBB->setSectionID(*SectionID); MF.setBBSectionsType(BasicBlockSection::List); } + if (BBID.has_value()) { + // BBSectionsType is set to `List` if any basic blocks has `SectionID`. + // Here, we set it to `Labels` if it hasn't been set above. + if (!MF.hasBBSections()) + MF.setBBSectionsType(BasicBlockSection::Labels); + MBB->setBBID(BBID.value()); + } return false; } @@ -987,7 +1023,7 @@ bool MIParser::parse(MachineInstr *&MI) { SmallVector<ParsedMachineOperand, 8> Operands; while (Token.isRegister() || Token.isRegisterFlag()) { auto Loc = Token.location(); - Optional<unsigned> TiedDefIdx; + std::optional<unsigned> TiedDefIdx; if (parseRegisterOperand(MO, TiedDefIdx, /*IsDef=*/true)) return true; Operands.push_back( @@ -1007,11 +1043,13 @@ bool MIParser::parse(MachineInstr *&MI) { while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_pre_instr_symbol) && Token.isNot(MIToken::kw_post_instr_symbol) && Token.isNot(MIToken::kw_heap_alloc_marker) && + Token.isNot(MIToken::kw_pcsections) && + Token.isNot(MIToken::kw_cfi_type) && Token.isNot(MIToken::kw_debug_location) && Token.isNot(MIToken::kw_debug_instr_number) && Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) { auto Loc = Token.location(); - Optional<unsigned> TiedDefIdx; + std::optional<unsigned> TiedDefIdx; if (parseMachineOperandAndTargetFlags(OpCode, Operands.size(), MO, TiedDefIdx)) return true; Operands.push_back( @@ -1036,6 +1074,24 @@ bool MIParser::parse(MachineInstr *&MI) { if (Token.is(MIToken::kw_heap_alloc_marker)) if (parseHeapAllocMarker(HeapAllocMarker)) return true; + MDNode *PCSections = nullptr; + if (Token.is(MIToken::kw_pcsections)) + if (parsePCSections(PCSections)) + return true; + + unsigned CFIType = 0; + if (Token.is(MIToken::kw_cfi_type)) { + lex(); + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected an integer literal after 'cfi-type'"); + // getUnsigned is sufficient for 32-bit integers. + if (getUnsigned(CFIType)) + return true; + lex(); + // Lex past trailing comma if present. + if (Token.is(MIToken::comma)) + lex(); + } unsigned InstrNum = 0; if (Token.is(MIToken::kw_debug_instr_number)) { @@ -1116,6 +1172,10 @@ bool MIParser::parse(MachineInstr *&MI) { MI->setPostInstrSymbol(MF, PostInstrSymbol); if (HeapAllocMarker) MI->setHeapAllocMarker(MF, HeapAllocMarker); + if (PCSections) + MI->setPCSections(MF, PCSections); + if (CFIType) + MI->setCFIType(MF, CFIType); if (!MemOperands.empty()) MI->setMemRefs(MF, MemOperands); if (InstrNum) @@ -1322,7 +1382,7 @@ bool MIParser::parseMetadata(Metadata *&MD) { // Forward reference. auto &FwdRef = PFS.MachineForwardRefMDNodes[ID]; FwdRef = std::make_pair( - MDTuple::getTemporary(MF.getFunction().getContext(), None), Loc); + MDTuple::getTemporary(MF.getFunction().getContext(), std::nullopt), Loc); PFS.MachineMetadataNodes[ID].reset(FwdRef.first.get()); MD = FwdRef.first.get(); @@ -1336,7 +1396,7 @@ static const char *printImplicitRegisterFlag(const MachineOperand &MO) { static std::string getRegisterName(const TargetRegisterInfo *TRI, Register Reg) { - assert(Register::isPhysicalRegister(Reg) && "expected phys reg"); + assert(Reg.isPhysical() && "expected phys reg"); return StringRef(TRI->getName(Reg)).lower(); } @@ -1359,14 +1419,10 @@ bool MIParser::verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands, // Gather all the expected implicit operands. SmallVector<MachineOperand, 4> ImplicitOperands; - if (MCID.ImplicitDefs) - for (const MCPhysReg *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs) - ImplicitOperands.push_back( - MachineOperand::CreateReg(*ImpDefs, true, true)); - if (MCID.ImplicitUses) - for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses) - ImplicitOperands.push_back( - MachineOperand::CreateReg(*ImpUses, false, true)); + for (MCPhysReg ImpDef : MCID.implicit_defs()) + ImplicitOperands.push_back(MachineOperand::CreateReg(ImpDef, true, true)); + for (MCPhysReg ImpUse : MCID.implicit_uses()) + ImplicitOperands.push_back(MachineOperand::CreateReg(ImpUse, false, true)); const auto *TRI = MF.getSubtarget().getRegisterInfo(); assert(TRI && "Expected target register info"); @@ -1648,7 +1704,7 @@ bool MIParser::assignRegisterTies(MachineInstr &MI, } bool MIParser::parseRegisterOperand(MachineOperand &Dest, - Optional<unsigned> &TiedDefIdx, + std::optional<unsigned> &TiedDefIdx, bool IsDef) { unsigned Flags = IsDef ? RegState::Define : 0; while (Token.isRegisterFlag()) { @@ -1666,11 +1722,11 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, if (Token.is(MIToken::dot)) { if (parseSubRegisterIndex(SubReg)) return true; - if (!Register::isVirtualRegister(Reg)) + if (!Reg.isVirtual()) return error("subregister index expects a virtual register"); } if (Token.is(MIToken::colon)) { - if (!Register::isVirtualRegister(Reg)) + if (!Reg.isVirtual()) return error("register class specification expects a virtual register"); lex(); if (parseRegisterClassOrBank(*RegInfo)) @@ -1700,7 +1756,7 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, } } else if (consumeIfPresent(MIToken::lparen)) { // Virtual registers may have a tpe with GlobalISel. - if (!Register::isVirtualRegister(Reg)) + if (!Reg.isVirtual()) return error("unexpected type on physical register"); LLT Ty; @@ -1715,7 +1771,7 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, MRI.setRegClassOrRegBank(Reg, static_cast<RegisterBank *>(nullptr)); MRI.setType(Reg, Ty); - } else if (Register::isVirtualRegister(Reg)) { + } else if (Reg.isVirtual()) { // Generic virtual registers must have a type. // If we end up here this means the type hasn't been specified and // this is bad! @@ -1744,9 +1800,12 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool MIParser::parseImmediateOperand(MachineOperand &Dest) { assert(Token.is(MIToken::IntegerLiteral)); const APSInt &Int = Token.integerValue(); - if (Int.getMinSignedBits() > 64) + if (auto SImm = Int.trySExtValue(); Int.isSigned() && SImm.has_value()) + Dest = MachineOperand::CreateImm(*SImm); + else if (auto UImm = Int.tryZExtValue(); !Int.isSigned() && UImm.has_value()) + Dest = MachineOperand::CreateImm(*UImm); + else return error("integer literal is too large to be an immediate operand"); - Dest = MachineOperand::CreateImm(Int.getExtValue()); lex(); return false; } @@ -1813,7 +1872,7 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) { return false; } -// See LLT implemntation for bit size limits. +// See LLT implementation for bit size limits. static bool verifyScalarSize(uint64_t Size) { return Size != 0 && isUInt<16>(Size); } @@ -2681,6 +2740,37 @@ bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) { return false; } +bool MIParser::parseDbgInstrRefOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::kw_dbg_instr_ref)); + + lex(); + if (expectAndConsume(MIToken::lparen)) + return error("expected syntax dbg-instr-ref(<unsigned>, <unsigned>)"); + + if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isNegative()) + return error("expected unsigned integer for instruction index"); + uint64_t InstrIdx = Token.integerValue().getZExtValue(); + assert(InstrIdx <= std::numeric_limits<unsigned>::max() && + "Instruction reference's instruction index is too large"); + lex(); + + if (expectAndConsume(MIToken::comma)) + return error("expected syntax dbg-instr-ref(<unsigned>, <unsigned>)"); + + if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isNegative()) + return error("expected unsigned integer for operand index"); + uint64_t OpIdx = Token.integerValue().getZExtValue(); + assert(OpIdx <= std::numeric_limits<unsigned>::max() && + "Instruction reference's operand index is too large"); + lex(); + + if (expectAndConsume(MIToken::rparen)) + return error("expected syntax dbg-instr-ref(<unsigned>, <unsigned>)"); + + Dest = MachineOperand::CreateDbgInstrRef(InstrIdx, OpIdx); + return false; +} + bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) { assert(Token.is(MIToken::kw_target_index)); lex(); @@ -2754,7 +2844,7 @@ bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) { bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx, MachineOperand &Dest, - Optional<unsigned> &TiedDefIdx) { + std::optional<unsigned> &TiedDefIdx) { switch (Token.kind()) { case MIToken::kw_implicit: case MIToken::kw_implicit_define: @@ -2832,6 +2922,8 @@ bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx, return parsePredicateOperand(Dest); case MIToken::kw_shufflemask: return parseShuffleMaskOperand(Dest); + case MIToken::kw_dbg_instr_ref: + return parseDbgInstrRefOperand(Dest); case MIToken::Error: return true; case MIToken::Identifier: @@ -2848,7 +2940,7 @@ bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx, if (const auto *Formatter = TII->getMIRFormatter()) { return parseTargetImmMnemonic(OpCode, OpIdx, Dest, *Formatter); } - LLVM_FALLTHROUGH; + [[fallthrough]]; } default: // FIXME: Parse the MCSymbol machine operand. @@ -2859,7 +2951,7 @@ bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx, bool MIParser::parseMachineOperandAndTargetFlags( const unsigned OpCode, const unsigned OpIdx, MachineOperand &Dest, - Optional<unsigned> &TiedDefIdx) { + std::optional<unsigned> &TiedDefIdx) { unsigned TF = 0; bool HasTargetFlags = false; if (Token.is(MIToken::kw_target_flags)) { @@ -2918,6 +3010,19 @@ bool MIParser::parseOffset(int64_t &Offset) { return false; } +bool MIParser::parseIRBlockAddressTaken(BasicBlock *&BB) { + assert(Token.is(MIToken::kw_ir_block_address_taken)); + lex(); + if (Token.isNot(MIToken::IRBlock) && Token.isNot(MIToken::NamedIRBlock)) + return error("expected basic block after 'ir_block_address_taken'"); + + if (parseIRBlock(BB, MF.getFunction())) + return true; + + lex(); + return false; +} + bool MIParser::parseAlignment(uint64_t &Alignment) { assert(Token.is(MIToken::kw_align) || Token.is(MIToken::kw_basealign)); lex(); @@ -3378,6 +3483,22 @@ bool MIParser::parseHeapAllocMarker(MDNode *&Node) { return false; } +bool MIParser::parsePCSections(MDNode *&Node) { + assert(Token.is(MIToken::kw_pcsections) && + "Invalid token for a PC sections!"); + lex(); + parseMDNode(Node); + if (!Node) + return error("expected a MDNode after 'pcsections'"); + if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) || + Token.is(MIToken::lbrace)) + return false; + if (Token.isNot(MIToken::comma)) + return error("expected ',' before the next machine operand"); + lex(); + return false; +} + static void initSlots2BasicBlocks( const Function &F, DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index aa9522bc3459..a20c2bfe6c0f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -234,7 +234,8 @@ MIRParserImpl::parseIRModule(DataLayoutCallbackTy DataLayoutCallback) { // Create an empty module when the MIR file is empty. NoMIRDocuments = true; auto M = std::make_unique<Module>(Filename, Context); - if (auto LayoutOverride = DataLayoutCallback(M->getTargetTriple())) + if (auto LayoutOverride = + DataLayoutCallback(M->getTargetTriple(), M->getDataLayoutStr())) M->setDataLayout(*LayoutOverride); return M; } @@ -257,7 +258,8 @@ MIRParserImpl::parseIRModule(DataLayoutCallbackTy DataLayoutCallback) { } else { // Create an new, empty module. M = std::make_unique<Module>(Filename, Context); - if (auto LayoutOverride = DataLayoutCallback(M->getTargetTriple())) + if (auto LayoutOverride = + DataLayoutCallback(M->getTargetTriple(), M->getDataLayoutStr())) M->setDataLayout(*LayoutOverride); NoLLVMIR = true; } @@ -441,6 +443,9 @@ void MIRParserImpl::setupDebugValueTracking( MF.makeDebugValueSubstitution({Sub.SrcInst, Sub.SrcOp}, {Sub.DstInst, Sub.DstOp}, Sub.Subreg); } + + // Flag for whether we're supposed to be using DBG_INSTR_REF. + MF.setUseDebugInstrRef(YamlMF.UseDebugInstrRef); } bool @@ -659,9 +664,11 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF) { MachineFunction &MF = PFS.MF; MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + bool Error = false; // Create VRegs - auto populateVRegInfo = [&] (const VRegInfo &Info, Twine Name) { + auto populateVRegInfo = [&](const VRegInfo &Info, Twine Name) { Register Reg = Info.VReg; switch (Info.Kind) { case VRegInfo::UNKNOWN: @@ -670,6 +677,14 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS, Error = true; break; case VRegInfo::NORMAL: + if (!Info.D.RC->isAllocatable()) { + error(Twine("Cannot use non-allocatable class '") + + TRI->getRegClassName(Info.D.RC) + "' for virtual register " + + Name + " in function '" + MF.getName() + "'"); + Error = true; + break; + } + MRI.setRegClass(Reg, Info.D.RC); if (Info.PreferredReg != 0) MRI.setSimpleHint(Reg, Info.PreferredReg); @@ -695,7 +710,6 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS, // Compute MachineRegisterInfo::UsedPhysRegMask for (const MachineBasicBlock &MBB : MF) { // Make sure MRI knows about registers clobbered by unwinder. - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (MBB.isEHPad()) if (auto *RegMask = TRI->getCustomEHPadPreservedMask(MF)) MRI.addPhysRegsUsedFromRegMask(RegMask); @@ -999,7 +1013,7 @@ SMDiagnostic MIRParserImpl::diagFromMIStringDiag(const SMDiagnostic &Error, (HasQuote ? 1 : 0)); // TODO: Translate any source ranges as well. - return SM.GetMessage(Loc, Error.getKind(), Error.getMessage(), None, + return SM.GetMessage(Loc, Error.getKind(), Error.getMessage(), std::nullopt, Error.getFixIts()); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp index 25823b1567f7..0a4b28ac79a7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp @@ -200,6 +200,7 @@ void MIRPrinter::print(const MachineFunction &MF) { YamlMF.HasEHCatchret = MF.hasEHCatchret(); YamlMF.HasEHScopes = MF.hasEHScopes(); YamlMF.HasEHFunclets = MF.hasEHFunclets(); + YamlMF.UseDebugInstrRef = MF.useDebugInstrRef(); YamlMF.Legalized = MF.getProperties().hasProperty( MachineFunctionProperties::Property::Legalized); @@ -306,13 +307,13 @@ void MIRPrinter::convert(yaml::MachineFunction &MF, // Print the virtual register definitions. for (unsigned I = 0, E = RegInfo.getNumVirtRegs(); I < E; ++I) { - unsigned Reg = Register::index2VirtReg(I); + Register Reg = Register::index2VirtReg(I); yaml::VirtualRegisterDefinition VReg; VReg.ID = I; if (RegInfo.getVRegName(Reg) != "") continue; ::printRegClassOrBank(Reg, VReg.Class, RegInfo, TRI); - unsigned PreferredReg = RegInfo.getSimpleHint(Reg); + Register PreferredReg = RegInfo.getSimpleHint(Reg); if (PreferredReg) printRegMIR(PreferredReg, VReg.PreferredRegister, TRI); MF.VirtualRegisters.push_back(VReg); @@ -819,6 +820,19 @@ void MIPrinter::print(const MachineInstr &MI) { HeapAllocMarker->printAsOperand(OS, MST); NeedComma = true; } + if (MDNode *PCSections = MI.getPCSections()) { + if (NeedComma) + OS << ','; + OS << " pcsections "; + PCSections->printAsOperand(OS, MST); + NeedComma = true; + } + if (uint32_t CFIType = MI.getCFIType()) { + if (NeedComma) + OS << ','; + OS << " cfi-type " << CFIType; + NeedComma = true; + } if (auto Num = MI.peekDebugInstrNum()) { if (NeedComma) @@ -880,7 +894,7 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx, MachineOperand::printSubRegIdx(OS, Op.getImm(), TRI); break; } - LLVM_FALLTHROUGH; + [[fallthrough]]; case MachineOperand::MO_Register: case MachineOperand::MO_CImmediate: case MachineOperand::MO_FPImmediate: @@ -897,6 +911,7 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx, case MachineOperand::MO_IntrinsicID: case MachineOperand::MO_Predicate: case MachineOperand::MO_BlockAddress: + case MachineOperand::MO_DbgInstrRef: case MachineOperand::MO_ShuffleMask: { unsigned TiedOperandIdx = 0; if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef()) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp index a2abe71a6bd7..e634a2b284c3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp @@ -62,7 +62,8 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) { /* HashConstantPoolIndices */ true, /* HashMemOperands */ true); assert(Hash && "Expected non-zero Hash"); - return std::to_string(Hash).substr(0, 5); + OS << format_hex_no_prefix(Hash, 16, true); + return OS.str(); } // Gets a hashable artifact from a given MachineOperand (ie an unsigned). @@ -76,7 +77,7 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) { MO.getType(), MO.getTargetFlags(), MO.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); case MachineOperand::MO_Register: - if (Register::isVirtualRegister(MO.getReg())) + if (MO.getReg().isVirtual()) return MRI.getVRegDef(MO.getReg())->getOpcode(); return MO.getReg(); case MachineOperand::MO_Immediate: @@ -112,6 +113,7 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) { case MachineOperand::MO_Metadata: case MachineOperand::MO_MCSymbol: case MachineOperand::MO_ShuffleMask: + case MachineOperand::MO_DbgInstrRef: return 0; } llvm_unreachable("Unexpected MachineOperandType."); @@ -132,7 +134,8 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) { } auto HashMI = hash_combine_range(MIOperands.begin(), MIOperands.end()); - return std::to_string(HashMI).substr(0, 5); + OS << format_hex_no_prefix(HashMI, 16, true); + return OS.str(); } unsigned VRegRenamer::createVirtualRegister(unsigned VReg) { @@ -153,7 +156,7 @@ bool VRegRenamer::renameInstsInMBB(MachineBasicBlock *MBB) { // Look for instructions that define VRegs in operand 0. MachineOperand &MO = Candidate.getOperand(0); // Avoid non regs, instructions defining physical regs. - if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !MO.getReg().isVirtual()) continue; VRegs.push_back( NamedVReg(MO.getReg(), Prefix + getInstructionOpcodeHash(Candidate))); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp index d21d552227cf..5cc8ad3d609e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp @@ -15,10 +15,12 @@ #include "RegAllocGreedy.h" #include "llvm/Analysis/MLModelRunner.h" #include "llvm/Analysis/TensorSpec.h" -#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TF_API) +#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TFLITE) #include "llvm/Analysis/ModelUnderTrainingRunner.h" #include "llvm/Analysis/NoInferenceModelRunner.h" +#include "llvm/Analysis/Utils/TrainingLogger.h" #endif +#include "MLRegallocEvictAdvisor.h" #include "llvm/Analysis/ReleaseModeModelRunner.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveRegMatrix.h" @@ -51,7 +53,7 @@ using CompiledModelType = NoopSavedModelImpl; #endif // Options that only make sense in development mode -#ifdef LLVM_HAVE_TF_API +#ifdef LLVM_HAVE_TFLITE #include "RegAllocScore.h" #include "llvm/Analysis/Utils/TFUtils.h" @@ -63,7 +65,14 @@ static cl::opt<std::string> ModelUnderTraining( "regalloc-model", cl::Hidden, cl::desc("The model being trained for register allocation eviction")); -#endif // #ifdef LLVM_HAVE_TF_API +static cl::opt<bool> EnableDevelopmentFeatures( + "regalloc-enable-development-features", cl::Hidden, + cl::desc("Whether or not to enable features under development for the ML " + "regalloc advisor")); + +#else +static const bool EnableDevelopmentFeatures = false; +#endif // #ifdef LLVM_HAVE_TFLITE extern cl::opt<unsigned> EvictInterferenceCutoff; @@ -89,6 +98,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); AU.addRequired<RegAllocEvictionAdvisorAnalysis>(); + AU.addRequired<RegAllocPriorityAdvisorAnalysis>(); AU.addRequired<MachineBlockFrequencyInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -109,20 +119,9 @@ INITIALIZE_PASS(RegAllocScoring, "regallocscoringpass", // Common ML Advisor declarations // =================================== namespace { -// This is the maximum number of interfererring ranges. That's the number of -// distinct AllocationOrder values, which comes from MCRegisterClass::RegsSize. -// For X86, that's 32. -// TODO: find a way to get this, statically, in a programmatic way. -static const int64_t MaxInterferences = 32; - -// Logically, we can think of the feature set given to the evaluator as a 2D -// matrix. The rows are the features (see next). The columns correspond to the -// interferences. We treat the candidate virt reg as an 'interference', too, as -// its feature set is the same as that of the interferring ranges. So we'll have -// MaxInterferences + 1 columns and by convention, we will use the last column -// for the virt reg seeking allocation. -static const int64_t CandidateVirtRegPos = MaxInterferences; -static const int64_t NumberOfInterferences = CandidateVirtRegPos + 1; +// The model can only accept a specified number of opcodes and will error it if +// fed an opcode it hasn't seen before. This constant sets the current cutoff. +static const int OpcodeValueCutoff = 17716; // Most features are as described above, so we'll reuse this vector in defining // them. @@ -192,25 +191,48 @@ static const std::vector<int64_t> PerLiveRangeShape{1, NumberOfInterferences}; "lowest stage of an interval in this LR") \ M(float, progress, {1}, "ratio of current queue size to initial size") -// The model learns to pick one of the mask == 1 interferences. This is the name -// of the output tensor. -// The contract with the model is that the output will be guaranteed to be to a -// mask == 1 position. -// Using a macro here to avoid 'not used' warnings (and keep cond compilation to -// a minimum) +#ifdef LLVM_HAVE_TFLITE +#define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M) \ + M(int64_t, instructions, InstructionsShape, \ + "Opcodes of the instructions covered by the eviction problem") + +#define RA_EVICT_REST_DEVELOPMENT_FEATURES(M) \ + M(int64_t, instructions_mapping, InstructionsMappingShape, \ + "A binary matrix mapping LRs to instruction opcodes") \ + M(float, mbb_frequencies, MBBFrequencyShape, \ + "A vector of machine basic block frequencies") \ + M(int64_t, mbb_mapping, InstructionsShape, \ + "A vector of indicies mapping instructions to MBBs") +#else +#define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M) +#define RA_EVICT_REST_DEVELOPMENT_FEATURES(M) +#endif + +// The model learns to pick one of the mask == 1 interferences. This is the +// name of the output tensor. The contract with the model is that the output +// will be guaranteed to be to a mask == 1 position. Using a macro here to +// avoid 'not used' warnings (and keep cond compilation to a minimum) #define DecisionName "index_to_evict" // Named features index. enum FeatureIDs { -#define _FEATURE_IDX(_, name, __, ___) name, - RA_EVICT_FEATURES_LIST(_FEATURE_IDX) +#define _FEATURE_IDX_SIMPLE(_, name, __, ___) name +#define _FEATURE_IDX(A, B, C, D) _FEATURE_IDX_SIMPLE(A, B, C, D), + RA_EVICT_FEATURES_LIST(_FEATURE_IDX) FeatureCount, +#ifdef LLVM_HAVE_TFLITE + RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_FEATURE_IDX_SIMPLE) = FeatureCount, +#else + RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_FEATURE_IDX) +#endif // #ifdef LLVM_HAVE_TFLITE + RA_EVICT_REST_DEVELOPMENT_FEATURES(_FEATURE_IDX) FeaturesWithDevelopmentCount #undef _FEATURE_IDX - FeatureCount +#undef _FEATURE_IDX_SIMPLE }; // The ML advisor will typically have a sparse input to the evaluator, because // various phys regs won't be available. It's easier (maintenance-wise) to -// bulk-reset the state of the evaluator each time we are about to use it again. +// bulk-reset the state of the evaluator each time we are about to use it +// again. template <typename T> size_t getTotalSize(const std::vector<int64_t> &Shape) { size_t Ret = sizeof(T); for (const auto V : Shape) @@ -223,11 +245,15 @@ void resetInputs(MLModelRunner &Runner) { std::memset(Runner.getTensorUntyped(FeatureIDs::NAME), 0, \ getTotalSize<TYPE>(SHAPE)); RA_EVICT_FEATURES_LIST(_RESET) + if (EnableDevelopmentFeatures) { + RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_RESET) + RA_EVICT_REST_DEVELOPMENT_FEATURES(_RESET) #undef _RESET + } } -// Per-live interval components that get aggregated into the feature values that -// will be passed to the evaluator. +// Per-live interval components that get aggregated into the feature values +// that will be passed to the evaluator. struct LIFeatureComponents { double R = 0; double W = 0; @@ -241,7 +267,8 @@ struct LIFeatureComponents { using CandidateRegList = std::array<std::pair<MCRegister, bool>, NumberOfInterferences>; -using FeaturesListNormalizer = std::array<float, FeatureIDs::FeatureCount>; +using FeaturesListNormalizer = + llvm::SmallVector<float, FeatureIDs::FeatureCount>; /// The ML evictor (commonalities between release and development mode) class MLEvictAdvisor : public RegAllocEvictionAdvisor { @@ -259,10 +286,10 @@ protected: // error, and we shouldn't be asking for it here. const MLModelRunner &getRunner() const { return *Runner; } - /// This just calls Evaluate on the Runner, but in the development mode case, - /// if we're just capturing the log of the default advisor, it needs to call - /// the latter instead, so we need to pass all the necessary parameters for - /// it. In the development case, it will also log. + /// This just calls Evaluate on the Runner, but in the development mode + /// case, if we're just capturing the log of the default advisor, it needs + /// to call the latter instead, so we need to pass all the necessary + /// parameters for it. In the development case, it will also log. virtual int64_t tryFindEvictionCandidatePosition(const LiveInterval &VirtReg, const AllocationOrder &Order, @@ -274,8 +301,8 @@ protected: bool loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, const SmallVirtRegSet &FixedRegisters, - std::array<float, FeatureIDs::FeatureCount> &Largest, - size_t Pos) const; + llvm::SmallVectorImpl<float> &Largest, size_t Pos, + SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const; private: static float getInitialQueueSize(const MachineFunction &MF); @@ -286,11 +313,12 @@ private: const SmallVirtRegSet &FixedRegisters) const override; void extractFeatures(const SmallVectorImpl<const LiveInterval *> &Intervals, - std::array<float, FeatureIDs::FeatureCount> &Largest, - size_t Pos, int64_t IsHint, int64_t LocalIntfsCount, - float NrUrgent) const; + llvm::SmallVectorImpl<float> &Largest, size_t Pos, + int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent, + SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const; - // Point-in-time: we didn't learn this, so we always delegate to the default. + // Point-in-time: we didn't learn this, so we always delegate to the + // default. bool canEvictHintInterference( const LiveInterval &VirtReg, MCRegister PhysReg, const SmallVirtRegSet &FixedRegisters) const override { @@ -302,9 +330,9 @@ private: getLIFeatureComponents(const LiveInterval &LI) const; // Hold on to a default advisor for: - // 1) the implementation of canEvictHintInterference, because we didn't learn - // that nuance yet; - // 2) for bootstrapping (logging) in the development mode case. + // 1) the implementation of canEvictHintInterference, because we didn't + // learn that nuance yet; 2) for bootstrapping (logging) in the development + // mode case. const DefaultEvictionAdvisor DefaultAdvisor; MLModelRunner *const Runner; const MachineBlockFrequencyInfo &MBFI; @@ -322,10 +350,6 @@ private: #define _DECL_FEATURES(type, name, shape, _) \ TensorSpec::createSpec<type>(#name, shape), -static const std::vector<TensorSpec> InputFeatures{ - {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}, -}; -#undef _DECL_FEATURES // =================================== // Release (AOT) - specifics // =================================== @@ -333,13 +357,23 @@ class ReleaseModeEvictionAdvisorAnalysis final : public RegAllocEvictionAdvisorAnalysis { public: ReleaseModeEvictionAdvisorAnalysis() - : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Release) {} + : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Release) { + if (EnableDevelopmentFeatures) { + InputFeatures = {RA_EVICT_FEATURES_LIST( + _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES) + RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)}; + } else { + InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; + } + } // support for isa<> and dyn_cast. static bool classof(const RegAllocEvictionAdvisorAnalysis *R) { return R->getAdvisorMode() == AdvisorMode::Release; } private: + std::vector<TensorSpec> InputFeatures; + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineBlockFrequencyInfo>(); AU.addRequired<MachineLoopInfo>(); @@ -363,25 +397,18 @@ private: // =================================== // // Features we log -#ifdef LLVM_HAVE_TF_API +#ifdef LLVM_HAVE_TFLITE static const TensorSpec Output = TensorSpec::createSpec<int64_t>(DecisionName, {1}); static const TensorSpec Reward = TensorSpec::createSpec<float>("reward", {1}); // Features we bind on the model. The tensor names have a prefix, and we also -// need to include some tensors that are expected to be present by the training -// algo. +// need to include some tensors that are expected to be present by the +// training algo. // TODO: can we just get rid of these? #define _DECL_TRAIN_FEATURES(type, name, shape, _) \ TensorSpec::createSpec<type>(std::string("action_") + #name, shape), -static const std::vector<TensorSpec> TrainingInputFeatures{ - {RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) - TensorSpec::createSpec<float>("action_discount", {1}), - TensorSpec::createSpec<int32_t>("action_step_type", {1}), - TensorSpec::createSpec<float>("action_reward", {1})}}; -#undef _DECL_TRAIN_FEATURES - class DevelopmentModeEvictAdvisor : public MLEvictAdvisor { public: DevelopmentModeEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA, @@ -403,30 +430,74 @@ class DevelopmentModeEvictionAdvisorAnalysis final : public RegAllocEvictionAdvisorAnalysis { public: DevelopmentModeEvictionAdvisorAnalysis() - : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Development) {} + : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Development) { + if (EnableDevelopmentFeatures) { + InputFeatures = {RA_EVICT_FEATURES_LIST( + _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES) + RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)}; + TrainingInputFeatures = { + RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) + RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_TRAIN_FEATURES) + RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_TRAIN_FEATURES) + TensorSpec::createSpec<float>("action_discount", {1}), + TensorSpec::createSpec<int32_t>("action_step_type", {1}), + TensorSpec::createSpec<float>("action_reward", {1})}; + } else { + InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; + TrainingInputFeatures = { + RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) + TensorSpec::createSpec<float>("action_discount", {1}), + TensorSpec::createSpec<int32_t>("action_step_type", {1}), + TensorSpec::createSpec<float>("action_reward", {1})}; + } + } // support for isa<> and dyn_cast. static bool classof(const RegAllocEvictionAdvisorAnalysis *R) { return R->getAdvisorMode() == AdvisorMode::Development; } - /// get the logger for the given function, or nullptr if we didn't collect - /// one. This is used to inject the score by the RegAllocScoring pass. - Logger *getLogger(const MachineFunction &MF) const { - auto I = LogMap.find(MF.getName()); - if (I == LogMap.end()) - return nullptr; - return I->second.get(); + void logRewardIfNeeded(const MachineFunction &MF, + llvm::function_ref<float()> GetReward) override { + if (!Log) + return; + // The function pass manager would run all the function passes for a + // function, so we assume the last context belongs to this function. If + // this invariant ever changes, we can implement at that time switching + // contexts. At this point, it'd be an error + if (Log->currentContext() != MF.getName()) { + MF.getFunction().getContext().emitError( + "The training log context shouldn't have had changed."); + } + if (Log->hasObservationInProgress()) + Log->logReward<float>(GetReward()); } private: + std::vector<TensorSpec> InputFeatures; + std::vector<TensorSpec> TrainingInputFeatures; + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineBlockFrequencyInfo>(); AU.addRequired<MachineLoopInfo>(); RegAllocEvictionAdvisorAnalysis::getAnalysisUsage(AU); } - // Save all the logs (when requested). - bool doFinalization(Module &M) override { + bool doInitialization(Module &M) override { + LLVMContext &Ctx = M.getContext(); + if (ModelUnderTraining.empty() && TrainingLog.empty()) { + Ctx.emitError("Regalloc development mode should be requested with at " + "least logging enabled and/or a training model"); + return false; + } + if (ModelUnderTraining.empty()) + Runner = std::make_unique<NoInferenceModelRunner>(Ctx, InputFeatures); + else + Runner = ModelUnderTrainingRunner::createAndEnsureValid( + Ctx, ModelUnderTraining, DecisionName, TrainingInputFeatures); + if (!Runner) { + Ctx.emitError("Regalloc: could not set up the model runner"); + return false; + } if (TrainingLog.empty()) return false; std::error_code EC; @@ -435,57 +506,35 @@ private: M.getContext().emitError(EC.message() + ":" + TrainingLog); return false; } - Logger::flushLogs(*OS, LogMap); + std::vector<TensorSpec> LFS = InputFeatures; + if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(Runner.get())) + append_range(LFS, MUTR->extraOutputsForLoggingSpecs()); + // We always log the output; in particular, if we're not evaluating, we + // don't have an output spec json file. That's why we handle the + // 'normal' output separately. + LFS.push_back(Output); + + Log = std::make_unique<Logger>(std::move(OS), LFS, Reward, + /*IncludeReward*/ true); return false; } std::unique_ptr<RegAllocEvictionAdvisor> getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { - LLVMContext &Ctx = MF.getFunction().getContext(); - if (ModelUnderTraining.empty() && TrainingLog.empty()) { - Ctx.emitError("Regalloc development mode should be requested with at " - "least logging enabled and/or a training model"); + if (!Runner) return nullptr; - } - if (!Runner) { - if (ModelUnderTraining.empty()) - Runner = std::make_unique<NoInferenceModelRunner>(Ctx, InputFeatures); - else - Runner = ModelUnderTrainingRunner::createAndEnsureValid( - Ctx, ModelUnderTraining, DecisionName, TrainingInputFeatures); - if (!Runner) { - Ctx.emitError("Regalloc: could not set up the model runner"); - return nullptr; - } - } - - Logger *Log = nullptr; - if (!TrainingLog.empty()) { - std::vector<LoggedFeatureSpec> LFS; - for (const auto &FS : InputFeatures) - LFS.push_back({FS, None}); - if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(Runner.get())) - if (MUTR->outputLoggedFeatureSpecs().size() > 1) - append_range(LFS, drop_begin(MUTR->outputLoggedFeatureSpecs())); - // We always log the output; in particular, if we're not evaluating, we - // don't have an output spec json file. That's why we handle the - // 'normal' output separately. - LFS.push_back({Output, None}); - auto I = LogMap.insert(std::make_pair( - MF.getFunction().getName(), - std::make_unique<Logger>(LFS, Reward, /*IncludeReward*/ true))); - assert(I.second); - Log = I.first->second.get(); - } + if (Log) + Log->switchContext(MF.getName()); return std::make_unique<DevelopmentModeEvictAdvisor>( MF, RA, Runner.get(), getAnalysis<MachineBlockFrequencyInfo>(), - getAnalysis<MachineLoopInfo>(), Log); + getAnalysis<MachineLoopInfo>(), Log.get()); } std::unique_ptr<MLModelRunner> Runner; - StringMap<std::unique_ptr<Logger>> LogMap; + std::unique_ptr<Logger> Log; }; -#endif //#ifdef LLVM_HAVE_TF_API + +#endif //#ifdef LLVM_HAVE_TFLITE } // namespace float MLEvictAdvisor::getInitialQueueSize(const MachineFunction &MF) { @@ -528,8 +577,9 @@ int64_t MLEvictAdvisor::tryFindEvictionCandidatePosition( bool MLEvictAdvisor::loadInterferenceFeatures( const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, - const SmallVirtRegSet &FixedRegisters, FeaturesListNormalizer &Largest, - size_t Pos) const { + const SmallVirtRegSet &FixedRegisters, + llvm::SmallVectorImpl<float> &Largest, size_t Pos, + llvm::SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const { // It is only possible to evict virtual register interference. if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) { // leave unavailable @@ -546,8 +596,8 @@ bool MLEvictAdvisor::loadInterferenceFeatures( SmallVector<const LiveInterval *, MaxInterferences> InterferingIntervals; for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); - // Different from the default heuristic, we don't make any assumptions about - // what having more than 10 results in the query may mean. + // Different from the default heuristic, we don't make any assumptions + // about what having more than 10 results in the query may mean. const auto &IFIntervals = Q.interferingVRegs(EvictInterferenceCutoff); if (IFIntervals.empty() && InterferingIntervals.empty()) continue; @@ -555,7 +605,7 @@ bool MLEvictAdvisor::loadInterferenceFeatures( return false; InterferingIntervals.append(IFIntervals.begin(), IFIntervals.end()); for (const LiveInterval *Intf : reverse(IFIntervals)) { - assert(Register::isVirtualRegister(Intf->reg()) && + assert(Intf->reg().isVirtual() && "Only expecting virtual register interference from query"); // This is the same set of legality checks as in the default case: don't // try to evict fixed regs or 'done' ones. Also don't break cascades, @@ -588,7 +638,7 @@ bool MLEvictAdvisor::loadInterferenceFeatures( // OK, so if we made it this far, this LR is an eviction candidate, load its // features. extractFeatures(InterferingIntervals, Largest, Pos, IsHint, LocalIntfs, - NrUrgent); + NrUrgent, LRPosInfo); return true; } @@ -604,14 +654,14 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate( // max<uint8_t>, then any of the costs of the legally-evictable intervals // would be lower. When that happens, one of those will be selected. // Therefore, we allow the candidate be selected, unless the candidate is - // unspillable, in which case it would be incorrect to not find a register for - // it. + // unspillable, in which case it would be incorrect to not find a register + // for it. const bool MustFindEviction = (!VirtReg.isSpillable() && CostPerUseLimit == static_cast<uint8_t>(~0u)); // Number of available candidates - if 0, no need to continue. size_t Available = 0; - // Make sure we don't have leftover partial state from an attempt where we had - // no available candidates and bailed out early. + // Make sure we don't have leftover partial state from an attempt where we + // had no available candidates and bailed out early. resetInputs(*Runner); // Track the index->register mapping because AllocationOrder doesn't do that @@ -624,16 +674,15 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate( // only normalize (some of) the float features, but it's just simpler to // dimension 'Largest' to all the features, especially since we have the // 'DoNotNormalize' list. - FeaturesListNormalizer Largest; - Largest.fill(0.0); - - // Same overal idea as in the default eviction policy - we visit the values of - // AllocationOrder one at a time. If it's not legally available, we mask off - // the corresponding feature column (==do nothing because we already reset all - // the features to 0) - // Use Pos to capture the column we load features at - in AllocationOrder - // order. + FeaturesListNormalizer Largest(FeatureIDs::FeatureCount, 0.0); + + // Same overal idea as in the default eviction policy - we visit the values + // of AllocationOrder one at a time. If it's not legally available, we mask + // off the corresponding feature column (==do nothing because we already + // reset all the features to 0) Use Pos to capture the column we load + // features at - in AllocationOrder order. size_t Pos = 0; + SmallVector<LRStartEndInfo, NumberOfInterferences> LRPosInfo; for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E; ++I, ++Pos) { MCRegister PhysReg = *I; @@ -643,7 +692,7 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate( continue; } if (loadInterferenceFeatures(VirtReg, PhysReg, I.isHint(), FixedRegisters, - Largest, Pos)) { + Largest, Pos, LRPosInfo)) { ++Available; Regs[Pos] = std::make_pair(PhysReg, true); } @@ -659,10 +708,39 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate( Regs[CandidateVirtRegPos].second = !MustFindEviction; if (!MustFindEviction) extractFeatures(SmallVector<const LiveInterval *, 1>(1, &VirtReg), Largest, - CandidateVirtRegPos, /*IsHint*/ 0, /*LocalIntfsCount*/ 0, - /*NrUrgent*/ 0.0); + CandidateVirtRegPos, /*IsHint*/ 0, + /*LocalIntfsCount*/ 0, + /*NrUrgent*/ 0.0, LRPosInfo); assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had " "nothing to allocate initially."); +#ifdef LLVM_HAVE_TFLITE + if (EnableDevelopmentFeatures) { + extractInstructionFeatures( + LRPosInfo, Runner, + [this](SlotIndex InputIndex) -> int { + auto *CurrentMachineInstruction = + LIS->getInstructionFromIndex(InputIndex); + if (!CurrentMachineInstruction) { + return -1; + } + return CurrentMachineInstruction->getOpcode(); + }, + [this](SlotIndex InputIndex) -> float { + auto *CurrentMachineInstruction = + LIS->getInstructionFromIndex(InputIndex); + return MBFI.getBlockFreqRelativeToEntryBlock( + CurrentMachineInstruction->getParent()); + }, + [this](SlotIndex InputIndex) -> MachineBasicBlock * { + auto *CurrentMachineInstruction = + LIS->getInstructionFromIndex(InputIndex); + return CurrentMachineInstruction->getParent(); + }, + FeatureIDs::instructions, FeatureIDs::instructions_mapping, + FeatureIDs::mbb_frequencies, FeatureIDs::mbb_mapping, + LIS->getSlotIndexes()->getLastIndex()); + } +#endif // #ifdef LLVM_HAVE_TFLITE // Normalize the features. for (auto &V : Largest) V = V ? V : 1.0; @@ -746,8 +824,9 @@ MLEvictAdvisor::getLIFeatureComponents(const LiveInterval &LI) const { // of accummulating the various features, we keep them separate. void MLEvictAdvisor::extractFeatures( const SmallVectorImpl<const LiveInterval *> &Intervals, - std::array<float, FeatureIDs::FeatureCount> &Largest, size_t Pos, - int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent) const { + llvm::SmallVectorImpl<float> &Largest, size_t Pos, int64_t IsHint, + int64_t LocalIntfsCount, float NrUrgent, + SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const { int64_t NrDefsAndUses = 0; int64_t NrBrokenHints = 0; double R = 0.0; @@ -794,6 +873,13 @@ void MLEvictAdvisor::extractFeatures( HintWeights += LIFC.HintWeights; NrRematerializable += LIFC.IsRemat; + + if (EnableDevelopmentFeatures) { + for (auto CurrentSegment : LI) { + LRPosInfo.push_back( + LRStartEndInfo{CurrentSegment.start, CurrentSegment.end, Pos}); + } + } } size_t Size = 0; if (!Intervals.empty()) { @@ -836,8 +922,143 @@ void MLEvictAdvisor::extractFeatures( #undef SET } +void extractInstructionFeatures( + SmallVectorImpl<LRStartEndInfo> &LRPosInfo, MLModelRunner *RegallocRunner, + function_ref<int(SlotIndex)> GetOpcode, + function_ref<float(SlotIndex)> GetMBBFreq, + function_ref<MachineBasicBlock *(SlotIndex)> GetMBBReference, + const int InstructionsIndex, const int InstructionsMappingIndex, + const int MBBFreqIndex, const int MBBMappingIndex, + const SlotIndex LastIndex) { + // This function extracts instruction based features relevant to the eviction + // problem currently being solved. This function ends up extracting two + // tensors. + // 1 - A vector of size max instruction count. It contains the opcodes of the + // instructions spanned by all the intervals in the current instance of the + // eviction problem. + // 2 - A binary mapping matrix of size (LR count * max + // instruction count) which maps where the LRs are live to the actual opcodes + // for which they are live. + // 3 - A vector of size max supported MBB count storing MBB frequencies, + // encompassing all of the MBBs covered by the eviction problem. + // 4 - A vector of size max instruction count of indices to members of the MBB + // frequency vector, mapping each instruction to its associated MBB. + + // Start off by sorting the segments based on the beginning slot index. + std::sort( + LRPosInfo.begin(), LRPosInfo.end(), + [](LRStartEndInfo A, LRStartEndInfo B) { return A.Begin < B.Begin; }); + size_t InstructionIndex = 0; + size_t CurrentSegmentIndex = 0; + SlotIndex CurrentIndex = LRPosInfo[0].Begin; + std::map<MachineBasicBlock *, size_t> VisitedMBBs; + size_t CurrentMBBIndex = 0; + // This loop processes all the segments sequentially by starting at the + // beginning slot index of the first segment, iterating through all the slot + // indices before the end slot index of that segment (while checking for + // overlaps with segments that start at greater slot indices). After hitting + // that end index, the current segment being processed gets bumped until they + // are all processed or the max instruction count is hit, where everything is + // just truncated. + while (true) { + // If the index that we are currently at is within the current segment and + // we haven't hit the max instruction count, continue processing the current + // segment. + while (CurrentIndex <= LRPosInfo[CurrentSegmentIndex].End && + InstructionIndex < ModelMaxSupportedInstructionCount) { + int CurrentOpcode = GetOpcode(CurrentIndex); + // If the current machine instruction is null, skip it + if (CurrentOpcode == -1) { + // If we're currently at the last index in the SlotIndex analysis, + // we can't go any further, so return from the function + if (CurrentIndex >= LastIndex) { + return; + } + CurrentIndex = CurrentIndex.getNextIndex(); + continue; + } + MachineBasicBlock *CurrentMBBReference = GetMBBReference(CurrentIndex); + if (VisitedMBBs.count(CurrentMBBReference) == 0) { + VisitedMBBs[CurrentMBBReference] = CurrentMBBIndex; + ++CurrentMBBIndex; + } + extractMBBFrequency(CurrentIndex, InstructionIndex, VisitedMBBs, + GetMBBFreq, CurrentMBBReference, RegallocRunner, + MBBFreqIndex, MBBMappingIndex); + // Current code assumes we're not going to get any disjointed segments + assert(LRPosInfo[CurrentSegmentIndex].Begin <= CurrentIndex); + RegallocRunner->getTensor<int64_t>(InstructionsIndex)[InstructionIndex] = + CurrentOpcode < OpcodeValueCutoff ? CurrentOpcode : 0; + // set value in the binary mapping matrix for the current instruction + auto CurrentSegmentPosition = LRPosInfo[CurrentSegmentIndex].Pos; + RegallocRunner->getTensor<int64_t>( + InstructionsMappingIndex)[CurrentSegmentPosition * + ModelMaxSupportedInstructionCount + + InstructionIndex] = 1; + // All of the segments are sorted based on the beginning slot index, but + // this doesn't mean that the beginning slot index of the next segment is + // after the end segment of the one being currently processed. This while + // loop checks for overlapping segments and modifies the portion of the + // column in the mapping matrix for the currently processed instruction + // for the LR it is checking. Also make sure that the beginning of the + // current segment we're checking for overlap in is less than the current + // index, otherwise we're done checking overlaps. + size_t OverlapCheckCurrentSegment = CurrentSegmentIndex + 1; + while (OverlapCheckCurrentSegment < LRPosInfo.size() && + LRPosInfo[OverlapCheckCurrentSegment].Begin <= CurrentIndex) { + auto OverlapCurrentSegmentPosition = + LRPosInfo[OverlapCheckCurrentSegment].Pos; + if (LRPosInfo[OverlapCheckCurrentSegment].End >= CurrentIndex) { + RegallocRunner->getTensor<int64_t>( + InstructionsMappingIndex)[OverlapCurrentSegmentPosition * + ModelMaxSupportedInstructionCount + + InstructionIndex] = 1; + } + ++OverlapCheckCurrentSegment; + } + ++InstructionIndex; + if (CurrentIndex >= LastIndex) { + return; + } + CurrentIndex = CurrentIndex.getNextIndex(); + } + // if we've just finished processing through the last segment or if we've + // hit the maximum number of instructions, break out of the loop. + if (CurrentSegmentIndex == LRPosInfo.size() - 1 || + InstructionIndex >= ModelMaxSupportedInstructionCount) { + break; + } + // If the segments are not overlapping, we need to move to the beginning + // index of the next segment to avoid having instructions not attached to + // any register. + if (LRPosInfo[CurrentSegmentIndex + 1].Begin > + LRPosInfo[CurrentSegmentIndex].End) { + CurrentIndex = LRPosInfo[CurrentSegmentIndex + 1].Begin; + } + ++CurrentSegmentIndex; + } +} + +void extractMBBFrequency(const SlotIndex CurrentIndex, + const size_t CurrentInstructionIndex, + std::map<MachineBasicBlock *, size_t> &VisitedMBBs, + function_ref<float(SlotIndex)> GetMBBFreq, + MachineBasicBlock *CurrentMBBReference, + MLModelRunner *RegallocRunner, const int MBBFreqIndex, + const int MBBMappingIndex) { + size_t CurrentMBBIndex = VisitedMBBs[CurrentMBBReference]; + float CurrentMBBFreq = GetMBBFreq(CurrentIndex); + if (CurrentMBBIndex < ModelMaxSupportedMBBCount) { + RegallocRunner->getTensor<float>(MBBFreqIndex)[CurrentMBBIndex] = + CurrentMBBFreq; + RegallocRunner->getTensor<int64_t>( + MBBMappingIndex)[CurrentInstructionIndex] = CurrentMBBIndex; + } +} + // Development mode-specific implementations -#ifdef LLVM_HAVE_TF_API +#ifdef LLVM_HAVE_TFLITE + RegAllocEvictionAdvisorAnalysis *llvm::createDevelopmentModeAdvisor() { return new DevelopmentModeEvictionAdvisorAnalysis(); } @@ -853,9 +1074,9 @@ int64_t DevelopmentModeEvictAdvisor::tryFindEvictionCandidatePosition( } else { MCRegister PhysReg = getDefaultAdvisor().tryFindEvictionCandidate( VirtReg, Order, CostPerUseLimit, FixedRegisters); - // Find the index of the selected PhysReg. We need it for logging, otherwise - // this is wasted cycles (but so would starting development mode without a - // model nor logging) + // Find the index of the selected PhysReg. We need it for logging, + // otherwise this is wasted cycles (but so would starting development mode + // without a model nor logging) if (!PhysReg) Ret = CandidateVirtRegPos; else @@ -866,41 +1087,57 @@ int64_t DevelopmentModeEvictAdvisor::tryFindEvictionCandidatePosition( } if (TrainingLog.empty()) return Ret; + // TODO(mtrofin): when we support optional rewards, this can go away. In the + // meantime, we log the "pretend" reward (0) for the previous observation + // before starting a new one. + if (Log->hasObservationInProgress()) + Log->logReward<float>(0.0); + + Log->startObservation(); size_t CurrentFeature = 0; - for (; CurrentFeature < FeatureIDs::FeatureCount; ++CurrentFeature) { - Log->logSpecifiedTensorValue( - CurrentFeature, reinterpret_cast<const char *>( + size_t FeatureCount = EnableDevelopmentFeatures + ? FeatureIDs::FeaturesWithDevelopmentCount + : FeatureIDs::FeatureCount; + for (; CurrentFeature < FeatureCount; ++CurrentFeature) { + Log->logTensorValue(CurrentFeature, + reinterpret_cast<const char *>( getRunner().getTensorUntyped(CurrentFeature))); } if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(&getRunner())) - for (size_t I = 1; I < MUTR->outputLoggedFeatureSpecs().size(); + for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size(); ++I, ++CurrentFeature) - Log->logSpecifiedTensorValue( + Log->logTensorValue( CurrentFeature, - reinterpret_cast<const char *>( - MUTR->lastEvaluationResult()->getUntypedTensorValue(I))); + reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I))); // The output is right after the features and the extra outputs - Log->logInt64Value(CurrentFeature, &Ret); + Log->logTensorValue(CurrentFeature, reinterpret_cast<const char *>(&Ret)); + Log->endObservation(); return Ret; } bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) { - if (auto *DevModeAnalysis = dyn_cast<DevelopmentModeEvictionAdvisorAnalysis>( - &getAnalysis<RegAllocEvictionAdvisorAnalysis>())) - if (auto *Log = DevModeAnalysis->getLogger(MF)) - Log->logFloatFinalReward(static_cast<float>( + std::optional<float> CachedReward; + auto GetReward = [&]() { + if (!CachedReward) + CachedReward = static_cast<float>( calculateRegAllocScore(MF, getAnalysis<MachineBlockFrequencyInfo>()) - .getScore())); - + .getScore()); + return *CachedReward; + }; + + getAnalysis<RegAllocEvictionAdvisorAnalysis>().logRewardIfNeeded(MF, + GetReward); + getAnalysis<RegAllocPriorityAdvisorAnalysis>().logRewardIfNeeded(MF, + GetReward); return false; } -#endif // #ifdef LLVM_HAVE_TF_API +#endif // #ifdef LLVM_HAVE_TFLITE RegAllocEvictionAdvisorAnalysis *llvm::createReleaseModeAdvisor() { return new ReleaseModeEvictionAdvisorAnalysis(); } // In all cases except development mode, we don't need scoring. -#if !defined(LLVM_HAVE_TF_API) +#if !defined(LLVM_HAVE_TFLITE) bool RegAllocScoring::runOnMachineFunction(MachineFunction &) { return false; } #endif diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h new file mode 100644 index 000000000000..e36a41154096 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h @@ -0,0 +1,93 @@ +//===- MLRegAllocEvictAdvisor.cpp - ML eviction advisor -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Function declarations of utilities related to feature extraction for unit +// testing. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MLREGALLOCEVICTIONADVISOR_H +#define LLVM_CODEGEN_MLREGALLOCEVICTIONADVISOR_H + +#include "llvm/Analysis/MLModelRunner.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/SlotIndexes.h" + +using namespace llvm; + +// LRStartEndInfo contains the start and end of a specific live range as +// slot indices as well as storing the index of the physical register it +// is assigned to (or 1 above the phys reg count if its the candidate). +// Used when extracting per-instruction features in the context of a +// specific eviction problem. +struct LRStartEndInfo { + SlotIndex Begin; + SlotIndex End; + size_t Pos = 0; +}; + +void extractInstructionFeatures( + llvm::SmallVectorImpl<LRStartEndInfo> &LRPosInfo, + MLModelRunner *RegallocRunner, function_ref<int(SlotIndex)> GetOpcode, + function_ref<float(SlotIndex)> GetMBBFreq, + function_ref<MachineBasicBlock *(SlotIndex)> GetMBBReference, + const int InstructionsIndex, const int InstructionsMappingIndex, + const int MBBFreqIndex, const int MBBMappingIndex, + const SlotIndex LastIndex); + +void extractMBBFrequency(const SlotIndex CurrentIndex, + const size_t CurrentInstructionIndex, + std::map<MachineBasicBlock *, size_t> &VisitedMBBs, + function_ref<float(SlotIndex)> GetMBBFreq, + MachineBasicBlock *CurrentMBBReference, + MLModelRunner *RegallocRunner, const int MBBFreqIndex, + const int MBBMappingIndex); + +// This is the maximum number of interfererring ranges. That's the number of +// distinct AllocationOrder values, which comes from MCRegisterClass::RegsSize. +// For X86, that's 32. +// TODO: find a way to get this, statically, in a programmatic way. +static const int64_t MaxInterferences = 32; + +// Logically, we can think of the feature set given to the evaluator as a 2D +// matrix. The rows are the features (see next). The columns correspond to the +// interferences. We treat the candidate virt reg as an 'interference', too, as +// its feature set is the same as that of the interferring ranges. So we'll have +// MaxInterferences + 1 columns and by convention, we will use the last column +// for the virt reg seeking allocation. +static const int64_t CandidateVirtRegPos = MaxInterferences; +static const int64_t NumberOfInterferences = CandidateVirtRegPos + 1; + +// The number of instructions that a specific live range might have is variable, +// but we're passing in a single matrix of instructions and tensorflow saved +// models only support a fixed input size, so we have to cap the number of +// instructions that can be passed along. The specific value was derived from +// experimentation such that the majority of eviction problems would be +// completely covered. +static const int ModelMaxSupportedInstructionCount = 300; + +// When extracting per-instruction features, the advisor will currently create +// a vector of size ModelMaxSupportedInstructionCount to hold the opcodes of the +// instructions relevant to the eviction problem, and a NumberOfInterferences * +// ModelMaxSupportedInstructionCount matrix that maps LRs to the instructions +// that they span. +static const std::vector<int64_t> InstructionsShape{ + 1, ModelMaxSupportedInstructionCount}; +static const std::vector<int64_t> InstructionsMappingShape{ + 1, NumberOfInterferences, ModelMaxSupportedInstructionCount}; + +// When extracting mappings between MBBs and individual instructions, we create +// a vector of MBB frequencies, currently of size 100, which was a value +// determined through experimentation to encompass the vast majority of eviction +// problems. The actual mapping is the same shape as the instruction opcodes +// vector. +static const int64_t ModelMaxSupportedMBBCount = 100; +static const std::vector<int64_t> MBBFrequencyShape{1, + ModelMaxSupportedMBBCount}; + +#endif // LLVM_CODEGEN_MLREGALLOCEVICTIONADVISOR_H diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp new file mode 100644 index 000000000000..320a184bdcc5 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp @@ -0,0 +1,335 @@ +//===- MLRegAllocPriorityAdvisor.cpp - ML priority advisor-----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of the ML priority advisor and reward injection pass +// +//===----------------------------------------------------------------------===// + +#include "AllocationOrder.h" +#include "RegAllocGreedy.h" +#include "RegAllocPriorityAdvisor.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MLModelRunner.h" +#include "llvm/Analysis/ReleaseModeModelRunner.h" +#include "llvm/Analysis/TensorSpec.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveRegMatrix.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/Support/CommandLine.h" + +#if defined(LLVM_HAVE_TFLITE) +#include "llvm/Analysis/ModelUnderTrainingRunner.h" +#include "llvm/Analysis/NoInferenceModelRunner.h" +#include "llvm/Analysis/Utils/TrainingLogger.h" +#endif + +using namespace llvm; + +// Options that only make sense in development mode +#ifdef LLVM_HAVE_TFLITE +#include "RegAllocScore.h" +#include "llvm/Analysis/Utils/TFUtils.h" + +static cl::opt<std::string> TrainingLog( + "regalloc-priority-training-log", cl::Hidden, + cl::desc("Training log for the register allocator priority model")); + +static cl::opt<std::string> ModelUnderTraining( + "regalloc-priority-model", cl::Hidden, + cl::desc("The model being trained for register allocation priority")); + +#endif // #ifdef LLVM_HAVE_TFLITE + +namespace llvm { + +static const std::vector<int64_t> PerLiveRangeShape{1}; + +#define RA_PRIORITY_FEATURES_LIST(M) \ + M(int64_t, li_size, PerLiveRangeShape, "size") \ + M(int64_t, stage, PerLiveRangeShape, "stage") \ + M(float, weight, PerLiveRangeShape, "weight") + +#define DecisionName "priority" + +// Named features index. +enum FeatureIDs { +#define _FEATURE_IDX(_, name, __, ___) name, + RA_PRIORITY_FEATURES_LIST(_FEATURE_IDX) +#undef _FEATURE_IDX + FeatureCount +}; + +class MLPriorityAdvisor : public RegAllocPriorityAdvisor { +public: + MLPriorityAdvisor(const MachineFunction &MF, const RAGreedy &RA, + SlotIndexes *const Indexes, MLModelRunner *Runner); + +protected: + const RegAllocPriorityAdvisor &getDefaultAdvisor() const { + return static_cast<const RegAllocPriorityAdvisor &>(DefaultAdvisor); + } + + // The assumption is that if the Runner could not be constructed, we emit-ed + // error, and we shouldn't be asking for it here. + const MLModelRunner &getRunner() const { return *Runner; } + float getPriorityImpl(const LiveInterval &LI) const; + unsigned getPriority(const LiveInterval &LI) const override; + +private: + const DefaultPriorityAdvisor DefaultAdvisor; + MLModelRunner *const Runner; +}; + +#define _DECL_FEATURES(type, name, shape, _) \ + TensorSpec::createSpec<type>(#name, shape), + +static const std::vector<TensorSpec> InputFeatures{ + {RA_PRIORITY_FEATURES_LIST(_DECL_FEATURES)}, +}; +#undef _DECL_FEATURES + +// =================================== +// Release (AOT) - specifics +// =================================== +class ReleaseModePriorityAdvisorAnalysis final + : public RegAllocPriorityAdvisorAnalysis { +public: + ReleaseModePriorityAdvisorAnalysis() + : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Release) {} + // support for isa<> and dyn_cast. + static bool classof(const RegAllocPriorityAdvisorAnalysis *R) { + return R->getAdvisorMode() == AdvisorMode::Release; + } + +private: + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired<SlotIndexes>(); + RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU); + } + + std::unique_ptr<RegAllocPriorityAdvisor> + getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { + if (!Runner) + Runner = std::make_unique<ReleaseModeModelRunner<NoopSavedModelImpl>>( + MF.getFunction().getContext(), InputFeatures, DecisionName); + return std::make_unique<MLPriorityAdvisor>( + MF, RA, &getAnalysis<SlotIndexes>(), Runner.get()); + } + std::unique_ptr<ReleaseModeModelRunner<NoopSavedModelImpl>> Runner; +}; + +// =================================== +// Development mode-specifics +// =================================== +// +// Features we log +#ifdef LLVM_HAVE_TFLITE + +static const TensorSpec Output = + TensorSpec::createSpec<float>(DecisionName, {1}); +static const TensorSpec Reward = TensorSpec::createSpec<float>("reward", {1}); + +#define _DECL_TRAIN_FEATURES(type, name, shape, _) \ + TensorSpec::createSpec<type>(std::string("action_") + #name, shape), + +static const std::vector<TensorSpec> TrainingInputFeatures{ + {RA_PRIORITY_FEATURES_LIST(_DECL_TRAIN_FEATURES) + TensorSpec::createSpec<float>("action_discount", {1}), + TensorSpec::createSpec<int32_t>("action_step_type", {1}), + TensorSpec::createSpec<float>("action_reward", {1})}}; +#undef _DECL_TRAIN_FEATURES + +class DevelopmentModePriorityAdvisor : public MLPriorityAdvisor { +public: + DevelopmentModePriorityAdvisor(const MachineFunction &MF, const RAGreedy &RA, + SlotIndexes *const Indexes, + MLModelRunner *Runner, Logger *Log) + : MLPriorityAdvisor(MF, RA, Indexes, Runner), Log(Log) {} + +private: + unsigned getPriority(const LiveInterval &LI) const override; + Logger *const Log; +}; + +class DevelopmentModePriorityAdvisorAnalysis final + : public RegAllocPriorityAdvisorAnalysis { +public: + DevelopmentModePriorityAdvisorAnalysis() + : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Development) {} + // support for isa<> and dyn_cast. + static bool classof(const RegAllocPriorityAdvisorAnalysis *R) { + return R->getAdvisorMode() == AdvisorMode::Development; + } + + void logRewardIfNeeded(const MachineFunction &MF, + llvm::function_ref<float()> GetReward) override { + if (!Log) + return; + // The function pass manager would run all the function passes for a + // function, so we assume the last context belongs to this function. If + // this invariant ever changes, we can implement at that time switching + // contexts. At this point, it'd be an error + if (Log->currentContext() != MF.getName()) { + MF.getFunction().getContext().emitError( + "The training log context shouldn't have had changed."); + } + if (Log->hasObservationInProgress()) + Log->logReward<float>(GetReward()); + } + +private: + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired<SlotIndexes>(); + RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU); + } + + // Save all the logs (when requested). + bool doInitialization(Module &M) override { + LLVMContext &Ctx = M.getContext(); + if (ModelUnderTraining.empty() && TrainingLog.empty()) { + Ctx.emitError("Regalloc development mode should be requested with at " + "least logging enabled and/or a training model"); + return false; + } + if (ModelUnderTraining.empty()) + Runner = std::make_unique<NoInferenceModelRunner>(Ctx, InputFeatures); + else + Runner = ModelUnderTrainingRunner::createAndEnsureValid( + Ctx, ModelUnderTraining, DecisionName, TrainingInputFeatures); + if (!Runner) { + Ctx.emitError("Regalloc: could not set up the model runner"); + return false; + } + if (TrainingLog.empty()) + return false; + std::error_code EC; + auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC); + if (EC) { + M.getContext().emitError(EC.message() + ":" + TrainingLog); + return false; + } + std::vector<TensorSpec> LFS = InputFeatures; + if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(Runner.get())) + append_range(LFS, MUTR->extraOutputsForLoggingSpecs()); + // We always log the output; in particular, if we're not evaluating, we + // don't have an output spec json file. That's why we handle the + // 'normal' output separately. + LFS.push_back(Output); + + Log = std::make_unique<Logger>(std::move(OS), LFS, Reward, + /*IncludeReward*/ true); + return false; + } + + std::unique_ptr<RegAllocPriorityAdvisor> + getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { + if (!Runner) + return nullptr; + if (Log) { + Log->switchContext(MF.getName()); + } + + return std::make_unique<DevelopmentModePriorityAdvisor>( + MF, RA, &getAnalysis<SlotIndexes>(), Runner.get(), Log.get()); + } + + std::unique_ptr<MLModelRunner> Runner; + std::unique_ptr<Logger> Log; +}; +#endif //#ifdef LLVM_HAVE_TFLITE + +} // namespace llvm + +RegAllocPriorityAdvisorAnalysis *llvm::createReleaseModePriorityAdvisor() { + return new ReleaseModePriorityAdvisorAnalysis(); +} + +MLPriorityAdvisor::MLPriorityAdvisor(const MachineFunction &MF, + const RAGreedy &RA, + SlotIndexes *const Indexes, + MLModelRunner *Runner) + : RegAllocPriorityAdvisor(MF, RA, Indexes), DefaultAdvisor(MF, RA, Indexes), + Runner(std::move(Runner)) { + assert(this->Runner); +} + +float MLPriorityAdvisor::getPriorityImpl(const LiveInterval &LI) const { + const unsigned Size = LI.getSize(); + LiveRangeStage Stage = RA.getExtraInfo().getStage(LI); + + *Runner->getTensor<int64_t>(0) = static_cast<int64_t>(Size); + *Runner->getTensor<int64_t>(1) = static_cast<int64_t>(Stage); + *Runner->getTensor<float>(2) = static_cast<float>(LI.weight()); + + return Runner->evaluate<float>(); +} + +unsigned MLPriorityAdvisor::getPriority(const LiveInterval &LI) const { + return static_cast<unsigned>(getPriorityImpl(LI)); +} + +#ifdef LLVM_HAVE_TFLITE +RegAllocPriorityAdvisorAnalysis *llvm::createDevelopmentModePriorityAdvisor() { + return new DevelopmentModePriorityAdvisorAnalysis(); +} + +unsigned +DevelopmentModePriorityAdvisor::getPriority(const LiveInterval &LI) const { + double Prio = 0; + + if (isa<ModelUnderTrainingRunner>(getRunner())) { + Prio = MLPriorityAdvisor::getPriorityImpl(LI); + } else { + Prio = getDefaultAdvisor().getPriority(LI); + } + + if (TrainingLog.empty()) + return Prio; + + // TODO(mtrofin): when we support optional rewards, this can go away. In the + // meantime, we log the "pretend" reward (0) for the previous observation + // before starting a new one. + if (Log->hasObservationInProgress()) + Log->logReward<float>(0.0); + + Log->startObservation(); + size_t CurrentFeature = 0; + for (; CurrentFeature < InputFeatures.size(); ++CurrentFeature) { + Log->logTensorValue(CurrentFeature, + reinterpret_cast<const char *>( + getRunner().getTensorUntyped(CurrentFeature))); + } + + if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(&getRunner())) { + for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size(); + ++I, ++CurrentFeature) + Log->logTensorValue( + CurrentFeature, + reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I))); + } + + float Ret = static_cast<float>(Prio); + Log->logTensorValue(CurrentFeature, reinterpret_cast<const char *>(&Ret)); + Log->endObservation(); + + return static_cast<unsigned>(Prio); +} + +#endif // #ifdef LLVM_HAVE_TFLITE diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp index 7381c7e6b09c..5ef377f2a1c0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/LiveVariables.h" @@ -34,6 +35,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include <algorithm> +#include <cmath> using namespace llvm; #define DEBUG_TYPE "codegen" @@ -253,6 +255,10 @@ MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() { return I; } +MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminatorForward() { + return find_if(instrs(), [](auto &II) { return II.isTerminator(); }); +} + MachineBasicBlock::iterator MachineBasicBlock::getFirstNonDebugInstr(bool SkipPseudoOp) { // Skip over begin-of-block dbg_value instructions. @@ -450,8 +456,8 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, if (IrrLoopHeaderWeight && IsStandalone) { if (Indexes) OS << '\t'; - OS.indent(2) << "; Irreducible loop header weight: " - << IrrLoopHeaderWeight.value() << '\n'; + OS.indent(2) << "; Irreducible loop header weight: " << *IrrLoopHeaderWeight + << '\n'; } } @@ -476,6 +482,28 @@ void MachineBasicBlock::printName(raw_ostream &os, unsigned printNameFlags, os << "bb." << getNumber(); bool hasAttributes = false; + auto PrintBBRef = [&](const BasicBlock *bb) { + os << "%ir-block."; + if (bb->hasName()) { + os << bb->getName(); + } else { + int slot = -1; + + if (moduleSlotTracker) { + slot = moduleSlotTracker->getLocalSlot(bb); + } else if (bb->getParent()) { + ModuleSlotTracker tmpTracker(bb->getModule(), false); + tmpTracker.incorporateFunction(*bb->getParent()); + slot = tmpTracker.getLocalSlot(bb); + } + + if (slot == -1) + os << "<ir-block badref>"; + else + os << slot; + } + }; + if (printNameFlags & PrintNameIr) { if (const auto *bb = getBasicBlock()) { if (bb->hasName()) { @@ -483,29 +511,21 @@ void MachineBasicBlock::printName(raw_ostream &os, unsigned printNameFlags, } else { hasAttributes = true; os << " ("; - - int slot = -1; - - if (moduleSlotTracker) { - slot = moduleSlotTracker->getLocalSlot(bb); - } else if (bb->getParent()) { - ModuleSlotTracker tmpTracker(bb->getModule(), false); - tmpTracker.incorporateFunction(*bb->getParent()); - slot = tmpTracker.getLocalSlot(bb); - } - - if (slot == -1) - os << "<ir-block badref>"; - else - os << (Twine("%ir-block.") + Twine(slot)).str(); + PrintBBRef(bb); } } } if (printNameFlags & PrintNameAttributes) { - if (hasAddressTaken()) { + if (isMachineBlockAddressTaken()) { os << (hasAttributes ? ", " : " ("); - os << "address-taken"; + os << "machine-block-address-taken"; + hasAttributes = true; + } + if (isIRBlockAddressTaken()) { + os << (hasAttributes ? ", " : " ("); + os << "ir-block-address-taken "; + PrintBBRef(getAddressTakenIRBlock()); hasAttributes = true; } if (isEHPad()) { @@ -543,6 +563,11 @@ void MachineBasicBlock::printName(raw_ostream &os, unsigned printNameFlags, } hasAttributes = true; } + if (getBBID().has_value()) { + os << (hasAttributes ? ", " : " ("); + os << "bb_id " << *getBBID(); + hasAttributes = true; + } } if (hasAttributes) @@ -919,7 +944,7 @@ const MachineBasicBlock *MachineBasicBlock::getSingleSuccessor() const { return Successors.size() == 1 ? Successors[0] : nullptr; } -MachineBasicBlock *MachineBasicBlock::getFallThrough() { +MachineBasicBlock *MachineBasicBlock::getFallThrough(bool JumpToFallThrough) { MachineFunction::iterator Fallthrough = getIterator(); ++Fallthrough; // If FallthroughBlock is off the end of the function, it can't fall through. @@ -950,8 +975,8 @@ MachineBasicBlock *MachineBasicBlock::getFallThrough() { // If there is some explicit branch to the fallthrough block, it can obviously // reach, even though the branch should get folded to fall through implicitly. - if (MachineFunction::iterator(TBB) == Fallthrough || - MachineFunction::iterator(FBB) == Fallthrough) + if (!JumpToFallThrough && (MachineFunction::iterator(TBB) == Fallthrough || + MachineFunction::iterator(FBB) == Fallthrough)) return &*Fallthrough; // If it's an unconditional branch to some block not the fall through, it @@ -1046,8 +1071,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( MO.isUndef()) continue; Register Reg = MO.getReg(); - if (Register::isPhysicalRegister(Reg) || - LV->getVarInfo(Reg).removeKill(MI)) { + if (Reg.isPhysical() || LV->getVarInfo(Reg).removeKill(MI)) { KilledRegs.push_back(Reg); LLVM_DEBUG(dbgs() << "Removing terminator kill: " << MI); MO.setIsKill(false); @@ -1133,7 +1157,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) { if (!(--I)->addRegisterKilled(Reg, TRI, /* AddIfNotFound= */ false)) continue; - if (Register::isVirtualRegister(Reg)) + if (Reg.isVirtual()) LV->getVarInfo(Reg).Kills.push_back(&*I); LLVM_DEBUG(dbgs() << "Restored terminator kill: " << *I); break; @@ -1631,6 +1655,11 @@ bool MachineBasicBlock::sizeWithoutDebugLargerThan(unsigned Limit) const { return false; } +unsigned MachineBasicBlock::getBBIDOrNumber() const { + uint8_t BBAddrMapVersion = getParent()->getContext().getBBAddrMapVersion(); + return BBAddrMapVersion < 2 ? getNumber() : *getBBID(); +} + const MBBSectionID MBBSectionID::ColdSectionID(MBBSectionID::SectionType::Cold); const MBBSectionID MBBSectionID::ExceptionSectionID(MBBSectionID::SectionType::Exception); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index c569f0350366..b1cbe525d7e6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -12,7 +12,6 @@ #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/iterator.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -23,6 +22,7 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/GraphWriter.h" +#include <optional> #include <string> using namespace llvm; @@ -231,19 +231,19 @@ MachineBlockFrequencyInfo::getBlockFreq(const MachineBasicBlock *MBB) const { return MBFI ? MBFI->getBlockFreq(MBB) : 0; } -Optional<uint64_t> MachineBlockFrequencyInfo::getBlockProfileCount( +std::optional<uint64_t> MachineBlockFrequencyInfo::getBlockProfileCount( const MachineBasicBlock *MBB) const { if (!MBFI) - return None; + return std::nullopt; const Function &F = MBFI->getFunction()->getFunction(); return MBFI->getBlockProfileCount(F, MBB); } -Optional<uint64_t> +std::optional<uint64_t> MachineBlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const { if (!MBFI) - return None; + return std::nullopt; const Function &F = MBFI->getFunction()->getFunction(); return MBFI->getProfileCountFromFreq(F, Freq); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 9ff5c37627b4..7bbc347a8cf8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -201,6 +201,18 @@ static cl::opt<unsigned> TriangleChainCount( cl::init(2), cl::Hidden); +// Use case: When block layout is visualized after MBP pass, the basic blocks +// are labeled in layout order; meanwhile blocks could be numbered in a +// different order. It's hard to map between the graph and pass output. +// With this option on, the basic blocks are renumbered in function layout +// order. For debugging only. +static cl::opt<bool> RenumberBlocksBeforeView( + "renumber-blocks-before-view", + cl::desc( + "If true, basic blocks are re-numbered before MBP layout is printed " + "into a dot graph. Only used when a function is being printed."), + cl::init(false), cl::Hidden); + extern cl::opt<bool> EnableExtTspBlockPlacement; extern cl::opt<bool> ApplyExtTspWithoutProfile; @@ -3466,6 +3478,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (ViewBlockLayoutWithBFI != GVDT_None && (ViewBlockFreqFuncName.empty() || F->getFunction().getName().equals(ViewBlockFreqFuncName))) { + if (RenumberBlocksBeforeView) + MF.RenumberBlocks(); MBFI->view("MBP." + MF.getName(), false); } @@ -3488,7 +3502,7 @@ void MachineBlockPlacement::applyExtTsp() { auto BlockSizes = std::vector<uint64_t>(F->size()); auto BlockCounts = std::vector<uint64_t>(F->size()); - DenseMap<std::pair<uint64_t, uint64_t>, uint64_t> JumpCounts; + std::vector<EdgeCountT> JumpCounts; for (MachineBasicBlock &MBB : *F) { // Getting the block frequency. BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB); @@ -3506,9 +3520,9 @@ void MachineBlockPlacement::applyExtTsp() { // Getting jump frequencies. for (MachineBasicBlock *Succ : MBB.successors()) { auto EP = MBPI->getEdgeProbability(&MBB, Succ); - BlockFrequency EdgeFreq = BlockFreq * EP; - auto Edge = std::make_pair(BlockIndex[&MBB], BlockIndex[Succ]); - JumpCounts[Edge] = EdgeFreq.getFrequency(); + BlockFrequency JumpFreq = BlockFreq * EP; + auto Jump = std::make_pair(BlockIndex[&MBB], BlockIndex[Succ]); + JumpCounts.push_back(std::make_pair(Jump, JumpFreq.getFrequency())); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCFGPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCFGPrinter.cpp new file mode 100644 index 000000000000..7bfb81771380 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCFGPrinter.cpp @@ -0,0 +1,95 @@ +//===- MachineCFGPrinter.cpp - DOT Printer for Machine Functions ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// +// This file defines the `-dot-machine-cfg` analysis pass, which emits +// Machine Function in DOT format in file titled `<prefix>.<function-name>.dot. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineCFGPrinter.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/Support/GraphWriter.h" + +using namespace llvm; + +#define DEBUG_TYPE "dot-machine-cfg" + +static cl::opt<std::string> + MCFGFuncName("mcfg-func-name", cl::Hidden, + cl::desc("The name of a function (or its substring)" + " whose CFG is viewed/printed.")); + +static cl::opt<std::string> MCFGDotFilenamePrefix( + "mcfg-dot-filename-prefix", cl::Hidden, + cl::desc("The prefix used for the Machine CFG dot file names.")); + +static cl::opt<bool> + CFGOnly("dot-mcfg-only", cl::init(false), cl::Hidden, + cl::desc("Print only the CFG without blocks body")); + +static void writeMCFGToDotFile(MachineFunction &MF) { + std::string Filename = + (MCFGDotFilenamePrefix + "." + MF.getName() + ".dot").str(); + errs() << "Writing '" << Filename << "'..."; + + std::error_code EC; + raw_fd_ostream File(Filename, EC, sys::fs::OF_Text); + + DOTMachineFuncInfo MCFGInfo(&MF); + + if (!EC) + WriteGraph(File, &MCFGInfo, CFGOnly); + else + errs() << " error opening file for writing!"; + errs() << '\n'; +} + +namespace { + +class MachineCFGPrinter : public MachineFunctionPass { +public: + static char ID; + + MachineCFGPrinter(); + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +} // namespace + +char MachineCFGPrinter::ID = 0; + +char &llvm::MachineCFGPrinterID = MachineCFGPrinter::ID; + +INITIALIZE_PASS(MachineCFGPrinter, DEBUG_TYPE, "Machine CFG Printer Pass", + false, true) + +/// Default construct and initialize the pass. +MachineCFGPrinter::MachineCFGPrinter() : MachineFunctionPass(ID) { + initializeMachineCFGPrinterPass(*PassRegistry::getPassRegistry()); +} + +bool MachineCFGPrinter::runOnMachineFunction(MachineFunction &MF) { + if (!MCFGFuncName.empty() && !MF.getName().contains(MCFGFuncName)) + return false; + errs() << "Writing Machine CFG for function "; + errs().write_escaped(MF.getName()) << '\n'; + + writeMCFGToDotFile(MF); + return false; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp index c6756b1d3737..cd8644029530 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp @@ -60,6 +60,11 @@ STATISTIC(NumCrossBBCSEs, "Number of cross-MBB physreg referencing CS eliminated"); STATISTIC(NumCommutes, "Number of copies coalesced after commuting"); +// Threshold to avoid excessive cost to compute isProfitableToCSE. +static cl::opt<int> + CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024), + cl::desc("Threshold for the size of CSUses")); + namespace { class MachineCSE : public MachineFunctionPass { @@ -140,7 +145,7 @@ namespace { DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren); bool PerformCSE(MachineDomTreeNode *Node); - bool isPRECandidate(MachineInstr *MI); + bool isPRECandidate(MachineInstr *MI, SmallSet<MCRegister, 8> &PhysRefs); bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB); bool PerformSimplePRE(MachineDominatorTree *DT); /// Heuristics to see if it's profitable to move common computations of MBB @@ -174,14 +179,14 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, if (!MO.isReg() || !MO.isUse()) continue; Register Reg = MO.getReg(); - if (!Register::isVirtualRegister(Reg)) + if (!Reg.isVirtual()) continue; bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg); MachineInstr *DefMI = MRI->getVRegDef(Reg); if (!DefMI->isCopy()) continue; Register SrcReg = DefMI->getOperand(1).getReg(); - if (!Register::isVirtualRegister(SrcReg)) + if (!SrcReg.isVirtual()) continue; if (DefMI->getOperand(0).getSubReg()) continue; @@ -260,8 +265,10 @@ bool MachineCSE::isPhysDefTriviallyDead( } static bool isCallerPreservedOrConstPhysReg(MCRegister Reg, + const MachineOperand &MO, const MachineFunction &MF, - const TargetRegisterInfo &TRI) { + const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII) { // MachineRegisterInfo::isConstantPhysReg directly called by // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the // reserved registers to be frozen. That doesn't cause a problem post-ISel as @@ -270,7 +277,7 @@ static bool isCallerPreservedOrConstPhysReg(MCRegister Reg, // It does cause issues mid-GlobalISel, however, hence the additional // reservedRegsFrozen check. const MachineRegisterInfo &MRI = MF.getRegInfo(); - return TRI.isCallerPreservedPhysReg(Reg, MF) || + return TRI.isCallerPreservedPhysReg(Reg, MF) || TII.isIgnorableUse(MO) || (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg)); } @@ -290,10 +297,11 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, Register Reg = MO.getReg(); if (!Reg) continue; - if (Register::isVirtualRegister(Reg)) + if (Reg.isVirtual()) continue; // Reading either caller preserved or constant physregs is ok. - if (!isCallerPreservedOrConstPhysReg(Reg.asMCReg(), *MI->getMF(), *TRI)) + if (!isCallerPreservedOrConstPhysReg(Reg.asMCReg(), MO, *MI->getMF(), *TRI, + *TII)) for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) PhysRefs.insert(*AI); } @@ -309,7 +317,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, Register Reg = MO.getReg(); if (!Reg) continue; - if (Register::isVirtualRegister(Reg)) + if (Reg.isVirtual()) continue; // Check against PhysRefs even if the def is "dead". if (PhysRefs.count(Reg.asMCReg())) @@ -384,7 +392,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, if (!MO.isReg() || !MO.isDef()) continue; Register MOReg = MO.getReg(); - if (Register::isVirtualRegister(MOReg)) + if (MOReg.isVirtual()) continue; if (PhysRefs.count(MOReg.asMCReg())) return false; @@ -440,18 +448,26 @@ bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg, // If CSReg is used at all uses of Reg, CSE should not increase register // pressure of CSReg. bool MayIncreasePressure = true; - if (Register::isVirtualRegister(CSReg) && Register::isVirtualRegister(Reg)) { + if (CSReg.isVirtual() && Reg.isVirtual()) { MayIncreasePressure = false; SmallPtrSet<MachineInstr*, 8> CSUses; + int NumOfUses = 0; for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { CSUses.insert(&MI); - } - for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { - if (!CSUses.count(&MI)) { + // Too costly to compute if NumOfUses is very large. Conservatively assume + // MayIncreasePressure to avoid spending too much time here. + if (++NumOfUses > CSUsesThreshold) { MayIncreasePressure = true; break; } } + if (!MayIncreasePressure) + for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { + if (!CSUses.count(&MI)) { + MayIncreasePressure = true; + break; + } + } } if (!MayIncreasePressure) return true; @@ -468,7 +484,7 @@ bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg, // of the redundant computation are copies, do not cse. bool HasVRegUse = false; for (const MachineOperand &MO : MI->operands()) { - if (MO.isReg() && MO.isUse() && Register::isVirtualRegister(MO.getReg())) { + if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual()) { HasVRegUse = true; break; } @@ -632,8 +648,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { continue; } - assert(Register::isVirtualRegister(OldReg) && - Register::isVirtualRegister(NewReg) && + assert(OldReg.isVirtual() && NewReg.isVirtual() && "Do not CSE physical register defs!"); if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), &MI)) { @@ -785,22 +800,24 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { // We use stronger checks for PRE candidate rather than for CSE ones to embrace // checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps // to exclude instrs created by PRE that won't be CSEed later. -bool MachineCSE::isPRECandidate(MachineInstr *MI) { +bool MachineCSE::isPRECandidate(MachineInstr *MI, + SmallSet<MCRegister, 8> &PhysRefs) { if (!isCSECandidate(MI) || MI->isNotDuplicable() || MI->mayLoad() || - MI->isAsCheapAsAMove() || + TII->isAsCheapAsAMove(*MI) || MI->getNumDefs() != 1 || MI->getNumExplicitDefs() != 1) return false; - for (const auto &def : MI->defs()) - if (!Register::isVirtualRegister(def.getReg())) - return false; - - for (const auto &use : MI->uses()) - if (use.isReg() && !Register::isVirtualRegister(use.getReg())) - return false; + for (const MachineOperand &MO : MI->operands()) { + if (MO.isReg() && !MO.getReg().isVirtual()) { + if (MO.isDef()) + return false; + else + PhysRefs.insert(MO.getReg()); + } + } return true; } @@ -809,7 +826,8 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, MachineBasicBlock *MBB) { bool Changed = false; for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) { - if (!isPRECandidate(&MI)) + SmallSet<MCRegister, 8> PhysRefs; + if (!isPRECandidate(&MI, PhysRefs)) continue; if (!PREMap.count(&MI)) { @@ -845,6 +863,15 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, if (MI.isConvergent() && CMBB != MBB) continue; + // If this instruction uses physical registers then we can only do PRE + // if it's using the value that is live at the place we're hoisting to. + bool NonLocal; + PhysDefVector PhysDefs; + if (!PhysRefs.empty() && + !PhysRegDefsReach(&*(CMBB->getFirstTerminator()), &MI, PhysRefs, + PhysDefs, NonLocal)) + continue; + assert(MI.getOperand(0).isDef() && "First operand of instr with one explicit def must be this def"); Register VReg = MI.getOperand(0).getReg(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp index 57e2cd20bdd0..974d570ece51 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -89,7 +90,6 @@ public: StringRef getPassName() const override { return "Machine InstCombiner"; } private: - bool doSubstitute(unsigned NewSize, unsigned OldSize, bool OptForSize); bool combineInstructions(MachineBasicBlock *); MachineInstr *getOperandDef(const MachineOperand &MO); bool isTransientMI(const MachineInstr *MI); @@ -151,7 +151,7 @@ void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const { MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) { MachineInstr *DefInstr = nullptr; // We need a virtual register definition. - if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) + if (MO.isReg() && MO.getReg().isVirtual()) DefInstr = MRI->getUniqueVRegDef(MO.getReg()); // PHI's have no depth etc. if (DefInstr && DefInstr->isPHI()) @@ -209,9 +209,6 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, MachineTraceMetrics::Trace BlockTrace) { SmallVector<unsigned, 16> InstrDepth; - assert(TSchedModel.hasInstrSchedModelOrItineraries() && - "Missing machine model\n"); - // For each instruction in the new sequence compute the depth based on the // operands. Use the trace information when possible. For new operands which // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth @@ -219,7 +216,7 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IDepth = 0; for (const MachineOperand &MO : InstrPtr->operands()) { // Check for virtual register operand. - if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg()))) + if (!(MO.isReg() && MO.getReg().isVirtual())) continue; if (!MO.isUse()) continue; @@ -267,15 +264,12 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, /// \returns Latency of \p NewRoot unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, MachineTraceMetrics::Trace BlockTrace) { - assert(TSchedModel.hasInstrSchedModelOrItineraries() && - "Missing machine model\n"); - // Check each definition in NewRoot and compute the latency unsigned NewRootLatency = 0; for (const MachineOperand &MO : NewRoot->operands()) { // Check for virtual register operand. - if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg()))) + if (!(MO.isReg() && MO.getReg().isVirtual())) continue; if (!MO.isDef()) continue; @@ -318,6 +312,10 @@ static CombinerObjective getCombinerObjective(MachineCombinerPattern P) { case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: case MachineCombinerPattern::SUBADD_OP1: case MachineCombinerPattern::SUBADD_OP2: + case MachineCombinerPattern::FMADD_AX: + case MachineCombinerPattern::FMADD_XA: + case MachineCombinerPattern::FMSUB: + case MachineCombinerPattern::FNMSUB: return CombinerObjective::MustReduceDepth; case MachineCombinerPattern::REASSOC_XY_BCA: case MachineCombinerPattern::REASSOC_XY_BAC: @@ -375,8 +373,6 @@ bool MachineCombiner::improvesCriticalPathLen( DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, MachineCombinerPattern Pattern, bool SlackIsAccurate) { - assert(TSchedModel.hasInstrSchedModelOrItineraries() && - "Missing machine model\n"); // Get depth and latency of NewRoot and Root. unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace); unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth; @@ -459,8 +455,8 @@ bool MachineCombiner::preservesResourceLen( instr2instrSC(InsInstrs, InsInstrsSC); instr2instrSC(DelInstrs, DelInstrsSC); - ArrayRef<const MCSchedClassDesc *> MSCInsArr = makeArrayRef(InsInstrsSC); - ArrayRef<const MCSchedClassDesc *> MSCDelArr = makeArrayRef(DelInstrsSC); + ArrayRef<const MCSchedClassDesc *> MSCInsArr{InsInstrsSC}; + ArrayRef<const MCSchedClassDesc *> MSCDelArr{DelInstrsSC}; // Compute new resource length. unsigned ResLenAfterCombine = @@ -480,17 +476,6 @@ bool MachineCombiner::preservesResourceLen( ResLenBeforeCombine + TII->getExtendResourceLenLimit(); } -/// \returns true when new instruction sequence should be generated -/// independent if it lengthens critical path or not -bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize, - bool OptForSize) { - if (OptForSize && (NewSize < OldSize)) - return true; - if (!TSchedModel.hasInstrSchedModelOrItineraries()) - return true; - return false; -} - /// Inserts InsInstrs and deletes DelInstrs. Incrementally updates instruction /// depths if requested. /// @@ -636,18 +621,16 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { if (VerifyPatternOrder) verifyPatternOrder(MBB, MI, Patterns); - for (auto P : Patterns) { + for (const auto P : Patterns) { SmallVector<MachineInstr *, 16> InsInstrs; SmallVector<MachineInstr *, 16> DelInstrs; DenseMap<unsigned, unsigned> InstrIdxForVirtReg; TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs, InstrIdxForVirtReg); - unsigned NewInstCount = InsInstrs.size(); - unsigned OldInstCount = DelInstrs.size(); // Found pattern, but did not generate alternative sequence. // This can happen e.g. when an immediate could not be materialized // in a single instruction. - if (!NewInstCount) + if (InsInstrs.empty()) continue; LLVM_DEBUG(if (dump_intrs) { @@ -662,10 +645,6 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { /*SkipDebugLoc*/false, /*AddNewLine*/true, TII); }); - bool SubstituteAlways = false; - if (ML && TII->isThroughputPattern(P)) - SubstituteAlways = true; - if (IncrementalUpdate && LastUpdate != BlockIter) { // Update depths since the last incremental update. MinInstr->updateDepths(LastUpdate, BlockIter, RegUnits); @@ -693,12 +672,17 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { } } - // Substitute when we optimize for codesize and the new sequence has - // fewer instructions OR - // the new sequence neither lengthens the critical path nor increases - // resource pressure. - if (SubstituteAlways || - doSubstitute(NewInstCount, OldInstCount, OptForSize)) { + if (ML && TII->isThroughputPattern(P)) { + LLVM_DEBUG(dbgs() << "\t Replacing due to throughput pattern in loop\n"); + insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr, + RegUnits, TII, P, IncrementalUpdate); + // Eagerly stop after the first pattern fires. + Changed = true; + break; + } else if (OptForSize && InsInstrs.size() < DelInstrs.size()) { + LLVM_DEBUG(dbgs() << "\t Replacing due to OptForSize (" + << InsInstrs.size() << " < " + << DelInstrs.size() << ")\n"); insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr, RegUnits, TII, P, IncrementalUpdate); // Eagerly stop after the first pattern fires. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp index 66f0eb83e57c..871824553aa4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -88,17 +88,17 @@ static cl::opt<bool> MCPUseCopyInstr("mcp-use-is-copy-instr", cl::init(false), namespace { -static Optional<DestSourcePair> isCopyInstr(const MachineInstr &MI, - const TargetInstrInfo &TII, - bool UseCopyInstr) { +static std::optional<DestSourcePair> isCopyInstr(const MachineInstr &MI, + const TargetInstrInfo &TII, + bool UseCopyInstr) { if (UseCopyInstr) return TII.isCopyInstr(MI); if (MI.isCopy()) - return Optional<DestSourcePair>( + return std::optional<DestSourcePair>( DestSourcePair{MI.getOperand(0), MI.getOperand(1)}); - return None; + return std::nullopt; } class CopyTracker { @@ -137,7 +137,7 @@ public: auto I = Copies.find(*RUI); if (I != Copies.end()) { if (MachineInstr *MI = I->second.MI) { - Optional<DestSourcePair> CopyOperands = + std::optional<DestSourcePair> CopyOperands = isCopyInstr(*MI, TII, UseCopyInstr); assert(CopyOperands && "Expect copy"); @@ -166,7 +166,7 @@ public: // When we clobber the destination of a copy, we need to clobber the // whole register it defined. if (MachineInstr *MI = I->second.MI) { - Optional<DestSourcePair> CopyOperands = + std::optional<DestSourcePair> CopyOperands = isCopyInstr(*MI, TII, UseCopyInstr); markRegsUnavailable({CopyOperands->Destination->getReg().asMCReg()}, TRI); @@ -180,7 +180,8 @@ public: /// Add this copy's registers into the tracker's copy maps. void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI, const TargetInstrInfo &TII, bool UseCopyInstr) { - Optional<DestSourcePair> CopyOperands = isCopyInstr(*MI, TII, UseCopyInstr); + std::optional<DestSourcePair> CopyOperands = + isCopyInstr(*MI, TII, UseCopyInstr); assert(CopyOperands && "Tracking non-copy?"); MCRegister Src = CopyOperands->Source->getReg().asMCReg(); @@ -236,7 +237,7 @@ public: if (!AvailCopy) return nullptr; - Optional<DestSourcePair> CopyOperands = + std::optional<DestSourcePair> CopyOperands = isCopyInstr(*AvailCopy, TII, UseCopyInstr); Register AvailSrc = CopyOperands->Source->getReg(); Register AvailDef = CopyOperands->Destination->getReg(); @@ -266,7 +267,7 @@ public: if (!AvailCopy) return nullptr; - Optional<DestSourcePair> CopyOperands = + std::optional<DestSourcePair> CopyOperands = isCopyInstr(*AvailCopy, TII, UseCopyInstr); Register AvailSrc = CopyOperands->Source->getReg(); Register AvailDef = CopyOperands->Destination->getReg(); @@ -383,7 +384,7 @@ static bool isNopCopy(const MachineInstr &PreviousCopy, MCRegister Src, MCRegister Def, const TargetRegisterInfo *TRI, const TargetInstrInfo *TII, bool UseCopyInstr) { - Optional<DestSourcePair> CopyOperands = + std::optional<DestSourcePair> CopyOperands = isCopyInstr(PreviousCopy, *TII, UseCopyInstr); MCRegister PreviousSrc = CopyOperands->Source->getReg().asMCReg(); MCRegister PreviousDef = CopyOperands->Destination->getReg().asMCReg(); @@ -422,7 +423,8 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, // Copy was redundantly redefining either Src or Def. Remove earlier kill // flags between Copy and PrevCopy because the value will be reused now. - Optional<DestSourcePair> CopyOperands = isCopyInstr(Copy, *TII, UseCopyInstr); + std::optional<DestSourcePair> CopyOperands = + isCopyInstr(Copy, *TII, UseCopyInstr); assert(CopyOperands); Register CopyDef = CopyOperands->Destination->getReg(); @@ -439,8 +441,8 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, bool MachineCopyPropagation::isBackwardPropagatableRegClassCopy( const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) { - - Optional<DestSourcePair> CopyOperands = isCopyInstr(Copy, *TII, UseCopyInstr); + std::optional<DestSourcePair> CopyOperands = + isCopyInstr(Copy, *TII, UseCopyInstr); Register Def = CopyOperands->Destination->getReg(); if (const TargetRegisterClass *URC = @@ -458,8 +460,8 @@ bool MachineCopyPropagation::isBackwardPropagatableRegClassCopy( bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) { - - Optional<DestSourcePair> CopyOperands = isCopyInstr(Copy, *TII, UseCopyInstr); + std::optional<DestSourcePair> CopyOperands = + isCopyInstr(Copy, *TII, UseCopyInstr); Register CopySrcReg = CopyOperands->Source->getReg(); // If the new register meets the opcode register constraints, then allow @@ -587,7 +589,7 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) { if (!Copy) continue; - Optional<DestSourcePair> CopyOperands = + std::optional<DestSourcePair> CopyOperands = isCopyInstr(*Copy, *TII, UseCopyInstr); Register CopyDstReg = CopyOperands->Destination->getReg(); const MachineOperand &CopySrc = *CopyOperands->Source; @@ -654,7 +656,8 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { // Analyze copies (which don't overlap themselves). - Optional<DestSourcePair> CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr); + std::optional<DestSourcePair> CopyOperands = + isCopyInstr(MI, *TII, UseCopyInstr); if (CopyOperands) { Register RegSrc = CopyOperands->Source->getReg(); @@ -777,7 +780,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { MaybeDeadCopies.begin(); DI != MaybeDeadCopies.end();) { MachineInstr *MaybeDead = *DI; - Optional<DestSourcePair> CopyOperands = + std::optional<DestSourcePair> CopyOperands = isCopyInstr(*MaybeDead, *TII, UseCopyInstr); MCRegister Reg = CopyOperands->Destination->getReg().asMCReg(); assert(!MRI->isReserved(Reg)); @@ -816,7 +819,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: "; MaybeDead->dump()); - Optional<DestSourcePair> CopyOperands = + std::optional<DestSourcePair> CopyOperands = isCopyInstr(*MaybeDead, *TII, UseCopyInstr); assert(CopyOperands); @@ -845,7 +848,8 @@ static bool isBackwardPropagatableCopy(MachineInstr &MI, const MachineRegisterInfo &MRI, const TargetInstrInfo &TII, bool UseCopyInstr) { - Optional<DestSourcePair> CopyOperands = isCopyInstr(MI, TII, UseCopyInstr); + std::optional<DestSourcePair> CopyOperands = + isCopyInstr(MI, TII, UseCopyInstr); assert(CopyOperands && "MI is expected to be a COPY"); Register Def = CopyOperands->Destination->getReg(); @@ -887,7 +891,7 @@ void MachineCopyPropagation::propagateDefs(MachineInstr &MI) { if (!Copy) continue; - Optional<DestSourcePair> CopyOperands = + std::optional<DestSourcePair> CopyOperands = isCopyInstr(*Copy, *TII, UseCopyInstr); Register Def = CopyOperands->Destination->getReg(); Register Src = CopyOperands->Source->getReg(); @@ -925,7 +929,8 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(MBB))) { // Ignore non-trivial COPYs. - Optional<DestSourcePair> CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr); + std::optional<DestSourcePair> CopyOperands = + isCopyInstr(MI, *TII, UseCopyInstr); if (CopyOperands && MI.getNumOperands() == 2) { Register DefReg = CopyOperands->Destination->getReg(); Register SrcReg = CopyOperands->Source->getReg(); @@ -986,8 +991,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( } for (auto *Copy : MaybeDeadCopies) { - - Optional<DestSourcePair> CopyOperands = + std::optional<DestSourcePair> CopyOperands = isCopyInstr(*Copy, *TII, UseCopyInstr); Register Src = CopyOperands->Source->getReg(); Register Def = CopyOperands->Destination->getReg(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp index 6871ac35b300..57f7a098ac17 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp @@ -9,8 +9,10 @@ #include "llvm/CodeGen/MachineCycleAnalysis.h" #include "llvm/ADT/GenericCycleImpl.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSSAContext.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" using namespace llvm; @@ -52,6 +54,7 @@ void MachineCycleInfoWrapperPass::releaseMemory() { F = nullptr; } +namespace { class MachineCycleInfoPrinterPass : public MachineFunctionPass { public: static char ID; @@ -61,6 +64,7 @@ public: bool runOnMachineFunction(MachineFunction &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override; }; +} // namespace char MachineCycleInfoPrinterPass::ID = 0; @@ -105,7 +109,7 @@ bool llvm::isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I) { // An instruction that uses or defines a physical register can't e.g. be // hoisted, so mark this as not invariant. - if (Register::isPhysicalRegister(Reg)) { + if (Reg.isPhysical()) { if (MO.isUse()) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp index b726a032ca18..adf1b51a950d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp @@ -153,10 +153,15 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI, NMD->setOperand(Idx, MDNode::get(Ctx, ValueAsMetadata::getConstant( ConstantInt::get(Int32Ty, N)))); }; + auto getDebugifyOperand = [&](unsigned Idx) { + return mdconst::extract<ConstantInt>(NMD->getOperand(Idx)->getOperand(0)) + ->getZExtValue(); + }; // Set number of lines. setDebugifyOperand(0, NextLine - 1); // Set number of variables. - setDebugifyOperand(1, VarSet.size()); + auto OldNumVars = getDebugifyOperand(1); + setDebugifyOperand(1, OldNumVars + VarSet.size()); } return true; @@ -166,6 +171,9 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI, /// legacy module pass manager. struct DebugifyMachineModule : public ModulePass { bool runOnModule(Module &M) override { + // We will insert new debugify metadata, so erasing the old one. + assert(!M.getNamedMetadata("llvm.mir.debugify") && + "llvm.mir.debugify metadata already exists! Strip it first"); MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); return applyDebugifyMetadata( diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp index f0190812389f..daf6a218165d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp @@ -58,7 +58,7 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, Align Alignment, !IsSpillSlot, StackID)); int Index = (int)Objects.size() - NumFixedObjects - 1; assert(Index >= 0 && "Bad frame index!"); - if (StackID == 0) + if (contributesToMaxAlignment(StackID)) ensureMaxAlignment(Alignment); return Index; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp index 6b481a374382..59e6647fa643 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp @@ -187,6 +187,7 @@ void MachineFunction::init() { RegInfo = nullptr; MFInfo = nullptr; + // We can realign the stack if the target supports it and the user hasn't // explicitly asked us not to. bool CanRealignSP = STI->getFrameLowering()->isStackRealignable() && @@ -232,6 +233,12 @@ void MachineFunction::init() { PSVManager = std::make_unique<PseudoSourceValueManager>(getTarget()); } +void MachineFunction::initTargetMachineFunctionInfo( + const TargetSubtargetInfo &STI) { + assert(!MFInfo && "MachineFunctionInfo already set"); + MFInfo = Target.createMachineFunctionInfo(Allocator, F, &STI); +} + MachineFunction::~MachineFunction() { clear(); } @@ -306,7 +313,7 @@ bool MachineFunction::shouldSplitStack() const { return getFunction().hasFnAttribute("split-stack"); } -LLVM_NODISCARD unsigned +[[nodiscard]] unsigned MachineFunction::addFrameInst(const MCCFIInstruction &Inst) { FrameInstructions.push_back(Inst); return FrameInstructions.size() - 1; @@ -437,8 +444,16 @@ void MachineFunction::deleteMachineInstr(MachineInstr *MI) { /// `new MachineBasicBlock'. MachineBasicBlock * MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) { - return new (BasicBlockRecycler.Allocate<MachineBasicBlock>(Allocator)) - MachineBasicBlock(*this, bb); + MachineBasicBlock *MBB = + new (BasicBlockRecycler.Allocate<MachineBasicBlock>(Allocator)) + MachineBasicBlock(*this, bb); + // Set BBID for `-basic-block=sections=labels` and + // `-basic-block-sections=list` to allow robust mapping of profiles to basic + // blocks. + if (Target.getBBSectionsType() == BasicBlockSection::Labels || + Target.getBBSectionsType() == BasicBlockSection::List) + MBB->setBBID(NextBBID++); + return MBB; } /// Delete the given MachineBasicBlock. @@ -530,9 +545,11 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, MachineInstr::ExtraInfo *MachineFunction::createMIExtraInfo( ArrayRef<MachineMemOperand *> MMOs, MCSymbol *PreInstrSymbol, - MCSymbol *PostInstrSymbol, MDNode *HeapAllocMarker) { + MCSymbol *PostInstrSymbol, MDNode *HeapAllocMarker, MDNode *PCSections, + uint32_t CFIType) { return MachineInstr::ExtraInfo::create(Allocator, MMOs, PreInstrSymbol, - PostInstrSymbol, HeapAllocMarker); + PostInstrSymbol, HeapAllocMarker, + PCSections, CFIType); } const char *MachineFunction::createExternalSymbolName(StringRef Name) { @@ -750,12 +767,10 @@ MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) { const Instruction *FirstI = LandingPad->getBasicBlock()->getFirstNonPHI(); if (const auto *LPI = dyn_cast<LandingPadInst>(FirstI)) { - if (const auto *PF = - dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts())) - getMMI().addPersonality(PF); - - if (LPI->isCleanup()) - addCleanup(LandingPad); + // If there's no typeid list specified, then "cleanup" is implicit. + // Otherwise, id 0 is reserved for the cleanup action. + if (LPI->isCleanup() && LPI->getNumClauses() != 0) + LP.TypeIds.push_back(0); // FIXME: New EH - Add the clauses in reverse order. This isn't 100% // correct, but we need to do it this way because of how the DWARF EH @@ -763,23 +778,25 @@ MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) { for (unsigned I = LPI->getNumClauses(); I != 0; --I) { Value *Val = LPI->getClause(I - 1); if (LPI->isCatch(I - 1)) { - addCatchTypeInfo(LandingPad, - dyn_cast<GlobalValue>(Val->stripPointerCasts())); + LP.TypeIds.push_back( + getTypeIDFor(dyn_cast<GlobalValue>(Val->stripPointerCasts()))); } else { // Add filters in a list. auto *CVal = cast<Constant>(Val); - SmallVector<const GlobalValue *, 4> FilterList; + SmallVector<unsigned, 4> FilterList; for (const Use &U : CVal->operands()) - FilterList.push_back(cast<GlobalValue>(U->stripPointerCasts())); + FilterList.push_back( + getTypeIDFor(cast<GlobalValue>(U->stripPointerCasts()))); - addFilterTypeInfo(LandingPad, FilterList); + LP.TypeIds.push_back(getFilterIDFor(FilterList)); } } } else if (const auto *CPI = dyn_cast<CatchPadInst>(FirstI)) { - for (unsigned I = CPI->getNumArgOperands(); I != 0; --I) { - Value *TypeInfo = CPI->getArgOperand(I - 1)->stripPointerCasts(); - addCatchTypeInfo(LandingPad, dyn_cast<GlobalValue>(TypeInfo)); + for (unsigned I = CPI->arg_size(); I != 0; --I) { + auto *TypeInfo = + dyn_cast<GlobalValue>(CPI->getArgOperand(I - 1)->stripPointerCasts()); + LP.TypeIds.push_back(getTypeIDFor(TypeInfo)); } } else { @@ -789,73 +806,6 @@ MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) { return LandingPadLabel; } -void MachineFunction::addCatchTypeInfo(MachineBasicBlock *LandingPad, - ArrayRef<const GlobalValue *> TyInfo) { - LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); - for (const GlobalValue *GV : llvm::reverse(TyInfo)) - LP.TypeIds.push_back(getTypeIDFor(GV)); -} - -void MachineFunction::addFilterTypeInfo(MachineBasicBlock *LandingPad, - ArrayRef<const GlobalValue *> TyInfo) { - LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); - std::vector<unsigned> IdsInFilter(TyInfo.size()); - for (unsigned I = 0, E = TyInfo.size(); I != E; ++I) - IdsInFilter[I] = getTypeIDFor(TyInfo[I]); - LP.TypeIds.push_back(getFilterIDFor(IdsInFilter)); -} - -void MachineFunction::tidyLandingPads(DenseMap<MCSymbol *, uintptr_t> *LPMap, - bool TidyIfNoBeginLabels) { - for (unsigned i = 0; i != LandingPads.size(); ) { - LandingPadInfo &LandingPad = LandingPads[i]; - if (LandingPad.LandingPadLabel && - !LandingPad.LandingPadLabel->isDefined() && - (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0)) - LandingPad.LandingPadLabel = nullptr; - - // Special case: we *should* emit LPs with null LP MBB. This indicates - // "nounwind" case. - if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) { - LandingPads.erase(LandingPads.begin() + i); - continue; - } - - if (TidyIfNoBeginLabels) { - for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) { - MCSymbol *BeginLabel = LandingPad.BeginLabels[j]; - MCSymbol *EndLabel = LandingPad.EndLabels[j]; - if ((BeginLabel->isDefined() || (LPMap && (*LPMap)[BeginLabel] != 0)) && - (EndLabel->isDefined() || (LPMap && (*LPMap)[EndLabel] != 0))) - continue; - - LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j); - LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j); - --j; - --e; - } - - // Remove landing pads with no try-ranges. - if (LandingPads[i].BeginLabels.empty()) { - LandingPads.erase(LandingPads.begin() + i); - continue; - } - } - - // If there is no landing pad, ensure that the list of typeids is empty. - // If the only typeid is a cleanup, this is the same as having no typeids. - if (!LandingPad.LandingPadBlock || - (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0])) - LandingPad.TypeIds.clear(); - ++i; - } -} - -void MachineFunction::addCleanup(MachineBasicBlock *LandingPad) { - LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); - LP.TypeIds.push_back(0); -} - void MachineFunction::setCallSiteLandingPad(MCSymbol *Sym, ArrayRef<unsigned> Sites) { LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end()); @@ -869,7 +819,7 @@ unsigned MachineFunction::getTypeIDFor(const GlobalValue *TI) { return TypeInfos.size(); } -int MachineFunction::getFilterIDFor(std::vector<unsigned> &TyIds) { +int MachineFunction::getFilterIDFor(ArrayRef<unsigned> TyIds) { // If the new filter coincides with the tail of an existing filter, then // re-use the existing filter. Folding filters more than this requires // re-ordering filters and/or their elements - probably not worth it. @@ -1187,58 +1137,65 @@ void MachineFunction::finalizeDebugInstrRefs() { auto *TII = getSubtarget().getInstrInfo(); auto MakeUndefDbgValue = [&](MachineInstr &MI) { - const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_VALUE); + const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_VALUE_LIST); MI.setDesc(RefII); - MI.getOperand(0).setReg(0); - MI.getOperand(1).ChangeToRegister(0, false); + MI.setDebugValueUndef(); }; DenseMap<Register, DebugInstrOperandPair> ArgDbgPHIs; for (auto &MBB : *this) { for (auto &MI : MBB) { - if (!MI.isDebugRef() || !MI.getOperand(0).isReg()) + if (!MI.isDebugRef()) continue; - Register Reg = MI.getOperand(0).getReg(); + bool IsValidRef = true; - // Some vregs can be deleted as redundant in the meantime. Mark those - // as DBG_VALUE $noreg. Additionally, some normal instructions are - // quickly deleted, leaving dangling references to vregs with no def. - if (Reg == 0 || !RegInfo->hasOneDef(Reg)) { - MakeUndefDbgValue(MI); - continue; - } + for (MachineOperand &MO : MI.debug_operands()) { + if (!MO.isReg()) + continue; - assert(Reg.isVirtual()); - MachineInstr &DefMI = *RegInfo->def_instr_begin(Reg); + Register Reg = MO.getReg(); - // If we've found a copy-like instruction, follow it back to the - // instruction that defines the source value, see salvageCopySSA docs - // for why this is important. - if (DefMI.isCopyLike() || TII->isCopyInstr(DefMI)) { - auto Result = salvageCopySSA(DefMI, ArgDbgPHIs); - MI.getOperand(0).ChangeToImmediate(Result.first); - MI.getOperand(1).setImm(Result.second); - } else { - // Otherwise, identify the operand number that the VReg refers to. - unsigned OperandIdx = 0; - for (const auto &MO : DefMI.operands()) { - if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) - break; - ++OperandIdx; + // Some vregs can be deleted as redundant in the meantime. Mark those + // as DBG_VALUE $noreg. Additionally, some normal instructions are + // quickly deleted, leaving dangling references to vregs with no def. + if (Reg == 0 || !RegInfo->hasOneDef(Reg)) { + IsValidRef = false; + break; } - assert(OperandIdx < DefMI.getNumOperands()); - // Morph this instr ref to point at the given instruction and operand. - unsigned ID = DefMI.getDebugInstrNum(); - MI.getOperand(0).ChangeToImmediate(ID); - MI.getOperand(1).setImm(OperandIdx); + assert(Reg.isVirtual()); + MachineInstr &DefMI = *RegInfo->def_instr_begin(Reg); + + // If we've found a copy-like instruction, follow it back to the + // instruction that defines the source value, see salvageCopySSA docs + // for why this is important. + if (DefMI.isCopyLike() || TII->isCopyInstr(DefMI)) { + auto Result = salvageCopySSA(DefMI, ArgDbgPHIs); + MO.ChangeToDbgInstrRef(Result.first, Result.second); + } else { + // Otherwise, identify the operand number that the VReg refers to. + unsigned OperandIdx = 0; + for (const auto &DefMO : DefMI.operands()) { + if (DefMO.isReg() && DefMO.isDef() && DefMO.getReg() == Reg) + break; + ++OperandIdx; + } + assert(OperandIdx < DefMI.getNumOperands()); + + // Morph this instr ref to point at the given instruction and operand. + unsigned ID = DefMI.getDebugInstrNum(); + MO.ChangeToDbgInstrRef(ID, OperandIdx); + } } + + if (!IsValidRef) + MakeUndefDbgValue(MI); } } } -bool MachineFunction::useDebugInstrRef() const { +bool MachineFunction::shouldUseDebugInstrRef() const { // Disable instr-ref at -O0: it's very slow (in compile time). We can still // have optimized code inlined into this unoptimized code, however with // fewer and less aggressive optimizations happening, coverage and accuracy @@ -1256,6 +1213,14 @@ bool MachineFunction::useDebugInstrRef() const { return false; } +bool MachineFunction::useDebugInstrRef() const { + return UseDebugInstrRef; +} + +void MachineFunction::setUseDebugInstrRef(bool Use) { + UseDebugInstrRef = Use; +} + // Use one million as a high / reserved number. const unsigned MachineFunction::DebugOperandMemNumber = 1000000; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp index 477310f59112..3a1e1720be9c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp @@ -73,10 +73,16 @@ bool MachineFunctionPass::runOnFunction(Function &F) { // For --print-changed, if the function name is a candidate, save the // serialized MF to be compared later. - // TODO Implement --filter-passes. SmallString<0> BeforeStr, AfterStr; - bool ShouldPrintChanged = PrintChanged != ChangePrinter::None && - isFunctionInPrintList(MF.getName()); + StringRef PassID; + if (PrintChanged != ChangePrinter::None) { + if (const PassInfo *PI = Pass::lookupPassInfo(getPassID())) + PassID = PI->getPassArgument(); + } + const bool IsInterestingPass = isPassInPrintList(PassID); + const bool ShouldPrintChanged = PrintChanged != ChangePrinter::None && + IsInterestingPass && + isFunctionInPrintList(MF.getName()); if (ShouldPrintChanged) { raw_svector_ostream OS(BeforeStr); MF.print(OS); @@ -112,18 +118,47 @@ bool MachineFunctionPass::runOnFunction(Function &F) { // For --print-changed, print if the serialized MF has changed. Modes other // than quiet/verbose are unimplemented and treated the same as 'quiet'. - if (ShouldPrintChanged) { - raw_svector_ostream OS(AfterStr); - MF.print(OS); - if (BeforeStr != AfterStr) { - StringRef Arg; - if (const PassInfo *PI = Pass::lookupPassInfo(getPassID())) - Arg = PI->getPassArgument(); - errs() << ("*** IR Dump After " + getPassName() + " (" + Arg + ") on " + - MF.getName() + " ***\n" + AfterStr); - } else if (PrintChanged == ChangePrinter::Verbose) { - errs() << ("*** IR Dump After " + getPassName() + " on " + MF.getName() + - " omitted because no change ***\n"); + if (ShouldPrintChanged || !IsInterestingPass) { + if (ShouldPrintChanged) { + raw_svector_ostream OS(AfterStr); + MF.print(OS); + } + if (IsInterestingPass && BeforeStr != AfterStr) { + errs() << ("*** IR Dump After " + getPassName() + " (" + PassID + + ") on " + MF.getName() + " ***\n"); + switch (PrintChanged) { + case ChangePrinter::None: + llvm_unreachable(""); + case ChangePrinter::Quiet: + case ChangePrinter::Verbose: + case ChangePrinter::DotCfgQuiet: // unimplemented + case ChangePrinter::DotCfgVerbose: // unimplemented + errs() << AfterStr; + break; + case ChangePrinter::DiffQuiet: + case ChangePrinter::DiffVerbose: + case ChangePrinter::ColourDiffQuiet: + case ChangePrinter::ColourDiffVerbose: { + bool Color = llvm::is_contained( + {ChangePrinter::ColourDiffQuiet, ChangePrinter::ColourDiffVerbose}, + PrintChanged.getValue()); + StringRef Removed = Color ? "\033[31m-%l\033[0m\n" : "-%l\n"; + StringRef Added = Color ? "\033[32m+%l\033[0m\n" : "+%l\n"; + StringRef NoChange = " %l\n"; + errs() << doSystemDiff(BeforeStr, AfterStr, Removed, Added, NoChange); + break; + } + } + } else if (llvm::is_contained({ChangePrinter::Verbose, + ChangePrinter::DiffVerbose, + ChangePrinter::ColourDiffVerbose}, + PrintChanged.getValue())) { + const char *Reason = + IsInterestingPass ? " omitted because no change" : " filtered out"; + errs() << "*** IR Dump After " << getPassName(); + if (!PassID.empty()) + errs() << " (" << PassID << ")"; + errs() << " on " << MF.getName() + Reason + " ***\n"; } } return RV; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp index 3e1aace855a5..613c52900331 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/Function.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" +#include <optional> using namespace llvm; @@ -57,6 +58,11 @@ static cl::opt<unsigned> ColdCountThreshold( "Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden); +static cl::opt<bool> SplitAllEHCode( + "mfs-split-ehcode", + cl::desc("Splits all EH code and it's descendants by default."), + cl::init(false), cl::Hidden); + namespace { class MachineFunctionSplitter : public MachineFunctionPass { @@ -76,10 +82,83 @@ public: }; } // end anonymous namespace +/// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable +/// only by EH pad as cold. This will help mark EH pads statically cold instead +/// of relying on profile data. +static void +setDescendantEHBlocksCold(SmallVectorImpl<MachineBasicBlock *> &EHBlocks, + MachineFunction &MF) { + MachineBasicBlock *StartBlock = &MF.front(); + // A block can be unknown if its not reachable from anywhere + // EH if its only reachable from start blocks via some path through EH pads + // NonEH if it's reachable from Non EH blocks as well. + enum Status { Unknown = 0, EH = 1, NonEH = 2 }; + DenseSet<MachineBasicBlock *> WorkList; + DenseMap<MachineBasicBlock *, Status> Statuses; + + auto getStatus = [&](MachineBasicBlock *MBB) { + if (Statuses.find(MBB) != Statuses.end()) + return Statuses[MBB]; + else + return Unknown; + }; + + auto checkPredecessors = [&](MachineBasicBlock *MBB, Status Stat) { + for (auto *PredMBB : MBB->predecessors()) { + Status PredStatus = getStatus(PredMBB); + // If status of predecessor block has gone above current block + // we update current blocks status. + if (PredStatus > Stat) + Stat = PredStatus; + } + return Stat; + }; + + auto addSuccesors = [&](MachineBasicBlock *MBB) { + for (auto *SuccMBB : MBB->successors()) { + if (!SuccMBB->isEHPad()) + WorkList.insert(SuccMBB); + } + }; + + // Insert the successors of start block + // and landing pads successor. + Statuses[StartBlock] = NonEH; + addSuccesors(StartBlock); + for (auto *LP : EHBlocks) { + addSuccesors(LP); + Statuses[LP] = EH; + } + + // Worklist iterative algorithm. + while (!WorkList.empty()) { + auto *MBB = *WorkList.begin(); + WorkList.erase(MBB); + + Status OldStatus = getStatus(MBB); + + // Check on predecessors and check for + // Status update. + Status NewStatus = checkPredecessors(MBB, OldStatus); + + // Did the block status change? + bool changed = OldStatus != NewStatus; + if (changed) { + addSuccesors(MBB); + Statuses[MBB] = NewStatus; + } + } + + for (auto Entry : Statuses) { + if (Entry.second == EH) + Entry.first->setSectionID(MBBSectionID::ColdSectionID); + } +} + static bool isColdBlock(const MachineBasicBlock &MBB, const MachineBlockFrequencyInfo *MBFI, ProfileSummaryInfo *PSI) { - Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB); + std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB); if (!Count) return true; @@ -90,9 +169,11 @@ static bool isColdBlock(const MachineBasicBlock &MBB, } bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { - // TODO: We only target functions with profile data. Static information may - // also be considered but we don't see performance improvements yet. - if (!MF.getFunction().hasProfileData()) + // We target functions with profile data. Static information in the form + // of exception handling code may be split to cold if user passes the + // mfs-split-ehcode flag. + bool UseProfileData = MF.getFunction().hasProfileData(); + if (!UseProfileData && !SplitAllEHCode) return false; // TODO: We don't split functions where a section attribute has been set @@ -105,9 +186,9 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { // We don't want to proceed further for cold functions // or functions of unknown hotness. Lukewarm functions have no prefix. - Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix(); - if (SectionPrefix && (SectionPrefix.value().equals("unlikely") || - SectionPrefix.value().equals("unknown"))) { + std::optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix(); + if (SectionPrefix && + (*SectionPrefix == "unlikely" || *SectionPrefix == "unknown")) { return false; } @@ -117,8 +198,13 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { // made by prior passes such as MachineBlockPlacement. MF.RenumberBlocks(); MF.setBBSectionsType(BasicBlockSection::Preset); - auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); - auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + + MachineBlockFrequencyInfo *MBFI = nullptr; + ProfileSummaryInfo *PSI = nullptr; + if (UseProfileData) { + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + } SmallVector<MachineBasicBlock *, 2> LandingPads; for (auto &MBB : MF) { @@ -127,21 +213,25 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { if (MBB.isEHPad()) LandingPads.push_back(&MBB); - else if (isColdBlock(MBB, MBFI, PSI)) + else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) && !SplitAllEHCode) MBB.setSectionID(MBBSectionID::ColdSectionID); } + // Split all EH code and it's descendant statically by default. + if (SplitAllEHCode) + setDescendantEHBlocksCold(LandingPads, MF); // We only split out eh pads if all of them are cold. - bool HasHotLandingPads = false; - for (const MachineBasicBlock *LP : LandingPads) { - if (!isColdBlock(*LP, MBFI, PSI)) - HasHotLandingPads = true; + else { + bool HasHotLandingPads = false; + for (const MachineBasicBlock *LP : LandingPads) { + if (!isColdBlock(*LP, MBFI, PSI)) + HasHotLandingPads = true; + } + if (!HasHotLandingPads) { + for (MachineBasicBlock *LP : LandingPads) + LP->setSectionID(MBBSectionID::ColdSectionID); + } } - if (!HasHotLandingPads) { - for (MachineBasicBlock *LP : LandingPads) - LP->setSectionID(MBBSectionID::ColdSectionID); - } - auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) { return X.getSectionID().Type < Y.getSectionID().Type; }; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp index e92dec5bea48..8e0777f8438a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp @@ -13,7 +13,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Hashing.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" @@ -85,14 +84,10 @@ static void tryToGetTargetInfo(const MachineInstr &MI, } void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) { - if (MCID->ImplicitDefs) - for (const MCPhysReg *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; - ++ImpDefs) - addOperand(MF, MachineOperand::CreateReg(*ImpDefs, true, true)); - if (MCID->ImplicitUses) - for (const MCPhysReg *ImpUses = MCID->getImplicitUses(); *ImpUses; - ++ImpUses) - addOperand(MF, MachineOperand::CreateReg(*ImpUses, false, true)); + for (MCPhysReg ImpDef : MCID->implicit_defs()) + addOperand(MF, MachineOperand::CreateReg(ImpDef, true, true)); + for (MCPhysReg ImpUse : MCID->implicit_uses()) + addOperand(MF, MachineOperand::CreateReg(ImpUse, false, true)); } /// MachineInstr ctor - This constructor creates a MachineInstr and adds the @@ -104,8 +99,8 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &TID, assert(DbgLoc.hasTrivialDestructor() && "Expected trivial destructor"); // Reserve space for the expected number of operands. - if (unsigned NumOps = MCID->getNumOperands() + - MCID->getNumImplicitDefs() + MCID->getNumImplicitUses()) { + if (unsigned NumOps = MCID->getNumOperands() + MCID->implicit_defs().size() + + MCID->implicit_uses().size()) { CapOperands = OperandCapacity::get(NumOps); Operands = MF.allocateOperandArray(CapOperands); } @@ -129,6 +124,14 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) for (const MachineOperand &MO : MI.operands()) addOperand(MF, MO); + // Replicate ties between the operands, which addOperand was not + // able to do reliably. + for (unsigned i = 0, e = getNumOperands(); i < e; ++i) { + MachineOperand &NewMO = getOperand(i); + const MachineOperand &OrigMO = MI.getOperand(i); + NewMO.TiedTo = OrigMO.TiedTo; + } + // Copy all the sensible flags. setFlags(MI.Flags); } @@ -301,12 +304,15 @@ void MachineInstr::setExtraInfo(MachineFunction &MF, ArrayRef<MachineMemOperand *> MMOs, MCSymbol *PreInstrSymbol, MCSymbol *PostInstrSymbol, - MDNode *HeapAllocMarker) { + MDNode *HeapAllocMarker, MDNode *PCSections, + uint32_t CFIType) { bool HasPreInstrSymbol = PreInstrSymbol != nullptr; bool HasPostInstrSymbol = PostInstrSymbol != nullptr; bool HasHeapAllocMarker = HeapAllocMarker != nullptr; - int NumPointers = - MMOs.size() + HasPreInstrSymbol + HasPostInstrSymbol + HasHeapAllocMarker; + bool HasPCSections = PCSections != nullptr; + bool HasCFIType = CFIType != 0; + int NumPointers = MMOs.size() + HasPreInstrSymbol + HasPostInstrSymbol + + HasHeapAllocMarker + HasPCSections + HasCFIType; // Drop all extra info if there is none. if (NumPointers <= 0) { @@ -318,9 +324,11 @@ void MachineInstr::setExtraInfo(MachineFunction &MF, // out of line because PointerSumType cannot hold more than 4 tag types with // 32-bit pointers. // FIXME: Maybe we should make the symbols in the extra info mutable? - else if (NumPointers > 1 || HasHeapAllocMarker) { - Info.set<EIIK_OutOfLine>(MF.createMIExtraInfo( - MMOs, PreInstrSymbol, PostInstrSymbol, HeapAllocMarker)); + else if (NumPointers > 1 || HasHeapAllocMarker || HasPCSections || + HasCFIType) { + Info.set<EIIK_OutOfLine>( + MF.createMIExtraInfo(MMOs, PreInstrSymbol, PostInstrSymbol, + HeapAllocMarker, PCSections, CFIType)); return; } @@ -338,7 +346,7 @@ void MachineInstr::dropMemRefs(MachineFunction &MF) { return; setExtraInfo(MF, {}, getPreInstrSymbol(), getPostInstrSymbol(), - getHeapAllocMarker()); + getHeapAllocMarker(), getPCSections(), getCFIType()); } void MachineInstr::setMemRefs(MachineFunction &MF, @@ -349,7 +357,7 @@ void MachineInstr::setMemRefs(MachineFunction &MF, } setExtraInfo(MF, MMOs, getPreInstrSymbol(), getPostInstrSymbol(), - getHeapAllocMarker()); + getHeapAllocMarker(), getPCSections(), getCFIType()); } void MachineInstr::addMemOperand(MachineFunction &MF, @@ -372,7 +380,8 @@ void MachineInstr::cloneMemRefs(MachineFunction &MF, const MachineInstr &MI) { // are the same (including null). if (getPreInstrSymbol() == MI.getPreInstrSymbol() && getPostInstrSymbol() == MI.getPostInstrSymbol() && - getHeapAllocMarker() == MI.getHeapAllocMarker()) { + getHeapAllocMarker() == MI.getHeapAllocMarker() && + getPCSections() == MI.getPCSections()) { Info = MI.Info; return; } @@ -457,7 +466,7 @@ void MachineInstr::setPreInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) { } setExtraInfo(MF, memoperands(), Symbol, getPostInstrSymbol(), - getHeapAllocMarker()); + getHeapAllocMarker(), getPCSections(), getCFIType()); } void MachineInstr::setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) { @@ -472,7 +481,7 @@ void MachineInstr::setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) { } setExtraInfo(MF, memoperands(), getPreInstrSymbol(), Symbol, - getHeapAllocMarker()); + getHeapAllocMarker(), getPCSections(), getCFIType()); } void MachineInstr::setHeapAllocMarker(MachineFunction &MF, MDNode *Marker) { @@ -481,7 +490,25 @@ void MachineInstr::setHeapAllocMarker(MachineFunction &MF, MDNode *Marker) { return; setExtraInfo(MF, memoperands(), getPreInstrSymbol(), getPostInstrSymbol(), - Marker); + Marker, getPCSections(), getCFIType()); +} + +void MachineInstr::setPCSections(MachineFunction &MF, MDNode *PCSections) { + // Do nothing if old and new symbols are the same. + if (PCSections == getPCSections()) + return; + + setExtraInfo(MF, memoperands(), getPreInstrSymbol(), getPostInstrSymbol(), + getHeapAllocMarker(), PCSections, getCFIType()); +} + +void MachineInstr::setCFIType(MachineFunction &MF, uint32_t Type) { + // Do nothing if old and new types are the same. + if (Type == getCFIType()) + return; + + setExtraInfo(MF, memoperands(), getPreInstrSymbol(), getPostInstrSymbol(), + getHeapAllocMarker(), getPCSections(), Type); } void MachineInstr::cloneInstrSymbols(MachineFunction &MF, @@ -496,6 +523,7 @@ void MachineInstr::cloneInstrSymbols(MachineFunction &MF, setPreInstrSymbol(MF, MI.getPreInstrSymbol()); setPostInstrSymbol(MF, MI.getPostInstrSymbol()); setHeapAllocMarker(MF, MI.getHeapAllocMarker()); + setPCSections(MF, MI.getPCSections()); } uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const { @@ -608,8 +636,7 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other, if (Check == IgnoreDefs) continue; else if (Check == IgnoreVRegDefs) { - if (!Register::isVirtualRegister(MO.getReg()) || - !Register::isVirtualRegister(OMO.getReg())) + if (!MO.getReg().isVirtual() || !OMO.getReg().isVirtual()) if (!MO.isIdenticalTo(OMO)) return false; } else { @@ -630,6 +657,34 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other, if (getDebugLoc() && Other.getDebugLoc() && getDebugLoc() != Other.getDebugLoc()) return false; + // If pre- or post-instruction symbols do not match then the two instructions + // are not identical. + if (getPreInstrSymbol() != Other.getPreInstrSymbol() || + getPostInstrSymbol() != Other.getPostInstrSymbol()) + return false; + // Call instructions with different CFI types are not identical. + if (isCall() && getCFIType() != Other.getCFIType()) + return false; + + return true; +} + +bool MachineInstr::isEquivalentDbgInstr(const MachineInstr &Other) const { + if (!isDebugValueLike() || !Other.isDebugValueLike()) + return false; + if (getDebugLoc() != Other.getDebugLoc()) + return false; + if (getDebugVariable() != Other.getDebugVariable()) + return false; + if (getNumDebugOperands() != Other.getNumDebugOperands()) + return false; + for (unsigned OpIdx = 0; OpIdx < getNumDebugOperands(); ++OpIdx) + if (!getDebugOperand(OpIdx).isIdenticalTo(Other.getDebugOperand(OpIdx))) + return false; + if (!DIExpression::isEqualExpression( + getDebugExpression(), isIndirectDebugValue(), + Other.getDebugExpression(), Other.isIndirectDebugValue())) + return false; return true; } @@ -794,14 +849,14 @@ const DILabel *MachineInstr::getDebugLabel() const { } const MachineOperand &MachineInstr::getDebugVariableOp() const { - assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE*"); - unsigned VariableOp = isDebugValueList() ? 0 : 2; + assert((isDebugValueLike()) && "not a DBG_VALUE*"); + unsigned VariableOp = isNonListDebugValue() ? 2 : 0; return getOperand(VariableOp); } MachineOperand &MachineInstr::getDebugVariableOp() { - assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE*"); - unsigned VariableOp = isDebugValueList() ? 0 : 2; + assert((isDebugValueLike()) && "not a DBG_VALUE*"); + unsigned VariableOp = isNonListDebugValue() ? 2 : 0; return getOperand(VariableOp); } @@ -810,14 +865,14 @@ const DILocalVariable *MachineInstr::getDebugVariable() const { } const MachineOperand &MachineInstr::getDebugExpressionOp() const { - assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE*"); - unsigned ExpressionOp = isDebugValueList() ? 1 : 3; + assert((isDebugValueLike()) && "not a DBG_VALUE*"); + unsigned ExpressionOp = isNonListDebugValue() ? 3 : 1; return getOperand(ExpressionOp); } MachineOperand &MachineInstr::getDebugExpressionOp() { - assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE*"); - unsigned ExpressionOp = isDebugValueList() ? 1 : 3; + assert((isDebugValueLike()) && "not a DBG_VALUE*"); + unsigned ExpressionOp = isNonListDebugValue() ? 3 : 1; return getOperand(ExpressionOp); } @@ -993,7 +1048,7 @@ MachineInstr::readsWritesVirtualRegister(Register Reg, int MachineInstr::findRegisterDefOperandIdx(Register Reg, bool isDead, bool Overlap, const TargetRegisterInfo *TRI) const { - bool isPhys = Register::isPhysicalRegister(Reg); + bool isPhys = Reg.isPhysical(); for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); // Accept regmask operands when Overlap is set. @@ -1004,7 +1059,7 @@ MachineInstr::findRegisterDefOperandIdx(Register Reg, bool isDead, bool Overlap, continue; Register MOReg = MO.getReg(); bool Found = (MOReg == Reg); - if (!Found && TRI && isPhys && Register::isPhysicalRegister(MOReg)) { + if (!Found && TRI && isPhys && MOReg.isPhysical()) { if (Overlap) Found = TRI->regsOverlap(MOReg, Reg); else @@ -1027,7 +1082,7 @@ int MachineInstr::findFirstPredOperandIdx() const { const MCInstrDesc &MCID = getDesc(); if (MCID.isPredicable()) { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - if (MCID.OpInfo[i].isPredicate()) + if (MCID.operands()[i].isPredicate()) return i; } @@ -1162,7 +1217,7 @@ void MachineInstr::clearKillInfo() { void MachineInstr::substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo) { - if (Register::isPhysicalRegister(ToReg)) { + if (ToReg.isPhysical()) { if (SubIdx) ToReg = RegInfo.getSubReg(ToReg, SubIdx); for (MachineOperand &MO : operands()) { @@ -1465,7 +1520,7 @@ LLT MachineInstr::getTypeToPrint(unsigned OpIdx, SmallBitVector &PrintedTypes, if (isVariadic() || OpIdx >= getNumExplicitOperands()) return MRI.getType(Op.getReg()); - auto &OpInfo = getDesc().OpInfo[OpIdx]; + auto &OpInfo = getDesc().operands()[OpIdx]; if (!OpInfo.isGenericType()) return MRI.getType(Op.getReg()); @@ -1748,6 +1803,19 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << " heap-alloc-marker "; HeapAllocMarker->printAsOperand(OS, MST); } + if (MDNode *PCSections = getPCSections()) { + if (!FirstOp) { + FirstOp = false; + OS << ','; + } + OS << " pcsections "; + PCSections->printAsOperand(OS, MST); + } + if (uint32_t CFIType = getCFIType()) { + if (!FirstOp) + OS << ','; + OS << " cfi-type " << CFIType; + } if (DebugInstrNum) { if (!FirstOp) @@ -1822,7 +1890,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, bool MachineInstr::addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound) { - bool isPhysReg = Register::isPhysicalRegister(IncomingReg); + bool isPhysReg = IncomingReg.isPhysical(); bool hasAliases = isPhysReg && MCRegAliasIterator(IncomingReg, RegInfo, false).isValid(); bool Found = false; @@ -1853,7 +1921,7 @@ bool MachineInstr::addRegisterKilled(Register IncomingReg, MO.setIsKill(); Found = true; } - } else if (hasAliases && MO.isKill() && Register::isPhysicalRegister(Reg)) { + } else if (hasAliases && MO.isKill() && Reg.isPhysical()) { // A super-register kill already exists. if (RegInfo->isSuperRegister(IncomingReg, Reg)) return true; @@ -1887,7 +1955,7 @@ bool MachineInstr::addRegisterKilled(Register IncomingReg, void MachineInstr::clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo) { - if (!Register::isPhysicalRegister(Reg)) + if (!Reg.isPhysical()) RegInfo = nullptr; for (MachineOperand &MO : operands()) { if (!MO.isReg() || !MO.isUse() || !MO.isKill()) @@ -1901,7 +1969,7 @@ void MachineInstr::clearRegisterKills(Register Reg, bool MachineInstr::addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound) { - bool isPhysReg = Register::isPhysicalRegister(Reg); + bool isPhysReg = Reg.isPhysical(); bool hasAliases = isPhysReg && MCRegAliasIterator(Reg, RegInfo, false).isValid(); bool Found = false; @@ -1917,8 +1985,7 @@ bool MachineInstr::addRegisterDead(Register Reg, if (MOReg == Reg) { MO.setIsDead(); Found = true; - } else if (hasAliases && MO.isDead() && - Register::isPhysicalRegister(MOReg)) { + } else if (hasAliases && MO.isDead() && MOReg.isPhysical()) { // There exists a super-register that's marked dead. if (RegInfo->isSuperRegister(Reg, MOReg)) return true; @@ -1969,7 +2036,7 @@ void MachineInstr::setRegisterDefReadUndef(Register Reg, bool IsUndef) { void MachineInstr::addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo) { - if (Register::isPhysicalRegister(Reg)) { + if (Reg.isPhysical()) { MachineOperand *MO = findRegisterDefOperand(Reg, false, false, RegInfo); if (MO) return; @@ -2017,7 +2084,7 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { HashComponents.reserve(MI->getNumOperands() + 1); HashComponents.push_back(MI->getOpcode()); for (const MachineOperand &MO : MI->operands()) { - if (MO.isReg() && MO.isDef() && Register::isVirtualRegister(MO.getReg())) + if (MO.isReg() && MO.isDef() && MO.getReg().isVirtual()) continue; // Skip virtual register defs. HashComponents.push_back(hash_value(MO)); @@ -2065,41 +2132,35 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID, bool IsIndirect, - const MachineOperand &MO, - const MDNode *Variable, const MDNode *Expr) { - assert(isa<DILocalVariable>(Variable) && "not a variable"); - assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); - assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && - "Expected inlined-at fields to agree"); - if (MO.isReg()) - return BuildMI(MF, DL, MCID, IsIndirect, MO.getReg(), Variable, Expr); - - auto MIB = BuildMI(MF, DL, MCID).add(MO); - if (IsIndirect) - MIB.addImm(0U); - else - MIB.addReg(0U); - return MIB.addMetadata(Variable).addMetadata(Expr); -} - -MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, - const MCInstrDesc &MCID, bool IsIndirect, - ArrayRef<MachineOperand> MOs, + ArrayRef<MachineOperand> DebugOps, const MDNode *Variable, const MDNode *Expr) { assert(isa<DILocalVariable>(Variable) && "not a variable"); assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); - if (MCID.Opcode == TargetOpcode::DBG_VALUE) - return BuildMI(MF, DL, MCID, IsIndirect, MOs[0], Variable, Expr); + if (MCID.Opcode == TargetOpcode::DBG_VALUE) { + assert(DebugOps.size() == 1 && + "DBG_VALUE must contain exactly one debug operand"); + MachineOperand DebugOp = DebugOps[0]; + if (DebugOp.isReg()) + return BuildMI(MF, DL, MCID, IsIndirect, DebugOp.getReg(), Variable, + Expr); + + auto MIB = BuildMI(MF, DL, MCID).add(DebugOp); + if (IsIndirect) + MIB.addImm(0U); + else + MIB.addReg(0U); + return MIB.addMetadata(Variable).addMetadata(Expr); + } auto MIB = BuildMI(MF, DL, MCID); MIB.addMetadata(Variable).addMetadata(Expr); - for (const MachineOperand &MO : MOs) - if (MO.isReg()) - MIB.addReg(MO.getReg()); + for (const MachineOperand &DebugOp : DebugOps) + if (DebugOp.isReg()) + MIB.addReg(DebugOp.getReg()); else - MIB.add(MO); + MIB.add(DebugOp); return MIB; } @@ -2117,21 +2178,12 @@ MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB, MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB, MachineBasicBlock::iterator I, const DebugLoc &DL, const MCInstrDesc &MCID, - bool IsIndirect, MachineOperand &MO, - const MDNode *Variable, const MDNode *Expr) { - MachineFunction &MF = *BB.getParent(); - MachineInstr *MI = BuildMI(MF, DL, MCID, IsIndirect, MO, Variable, Expr); - BB.insert(I, MI); - return MachineInstrBuilder(MF, *MI); -} - -MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB, - MachineBasicBlock::iterator I, - const DebugLoc &DL, const MCInstrDesc &MCID, - bool IsIndirect, ArrayRef<MachineOperand> MOs, + bool IsIndirect, + ArrayRef<MachineOperand> DebugOps, const MDNode *Variable, const MDNode *Expr) { MachineFunction &MF = *BB.getParent(); - MachineInstr *MI = BuildMI(MF, DL, MCID, IsIndirect, MOs, Variable, Expr); + MachineInstr *MI = + BuildMI(MF, DL, MCID, IsIndirect, DebugOps, Variable, Expr); BB.insert(I, MI); return MachineInstrBuilder(MF, *MI); } @@ -2173,6 +2225,8 @@ MachineInstr *llvm::buildDbgValueForSpill(MachineBasicBlock &BB, MachineBasicBlock::iterator I, const MachineInstr &Orig, int FrameIndex, Register SpillReg) { + assert(!Orig.isDebugRef() && + "DBG_INSTR_REF should not reference a virtual register."); const DIExpression *Expr = computeExprForSpill(Orig, SpillReg); MachineInstrBuilder NewMI = BuildMI(BB, I, Orig.getDebugLoc(), Orig.getDesc()); @@ -2275,7 +2329,7 @@ static unsigned getSpillSlotSize(const MMOList &Accesses, return Size; } -Optional<unsigned> +std::optional<unsigned> MachineInstr::getSpillSize(const TargetInstrInfo *TII) const { int FI; if (TII->isStoreToStackSlotPostFE(*this, FI)) { @@ -2283,18 +2337,18 @@ MachineInstr::getSpillSize(const TargetInstrInfo *TII) const { if (MFI.isSpillSlotObjectIndex(FI)) return (*memoperands_begin())->getSize(); } - return None; + return std::nullopt; } -Optional<unsigned> +std::optional<unsigned> MachineInstr::getFoldedSpillSize(const TargetInstrInfo *TII) const { MMOList Accesses; if (TII->hasStoreToStackSlot(*this, Accesses)) return getSpillSlotSize(Accesses, getMF()->getFrameInfo()); - return None; + return std::nullopt; } -Optional<unsigned> +std::optional<unsigned> MachineInstr::getRestoreSize(const TargetInstrInfo *TII) const { int FI; if (TII->isLoadFromStackSlotPostFE(*this, FI)) { @@ -2302,15 +2356,15 @@ MachineInstr::getRestoreSize(const TargetInstrInfo *TII) const { if (MFI.isSpillSlotObjectIndex(FI)) return (*memoperands_begin())->getSize(); } - return None; + return std::nullopt; } -Optional<unsigned> +std::optional<unsigned> MachineInstr::getFoldedRestoreSize(const TargetInstrInfo *TII) const { MMOList Accesses; if (TII->hasLoadFromStackSlot(*this, Accesses)) return getSpillSlotSize(Accesses, getMF()->getFrameInfo()); - return None; + return std::nullopt; } unsigned MachineInstr::getDebugInstrNum() { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp index 2f1d7b976264..0c059a145ca4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp @@ -198,7 +198,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, DeadDefSet.erase(Reg); } - if (!MO.isDead() && Register::isPhysicalRegister(Reg)) { + if (!MO.isDead() && Reg.isPhysical()) { for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { unsigned SubReg = *SubRegs; if (LocalDefSet.insert(SubReg).second) @@ -328,7 +328,7 @@ PhysRegInfo llvm::AnalyzePhysRegInBundle(const MachineInstr &MI, Register Reg, continue; Register MOReg = MO.getReg(); - if (!MOReg || !Register::isPhysicalRegister(MOReg)) + if (!MOReg || !MOReg.isPhysical()) continue; if (!TRI->regsOverlap(MOReg, Reg)) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp index df7b6c782b91..1c09c01df3aa 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp @@ -452,8 +452,7 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, Register Reg = MO.getReg(); if (!Reg) continue; - assert(Register::isPhysicalRegister(Reg) && - "Not expecting virtual register!"); + assert(Reg.isPhysical() && "Not expecting virtual register!"); if (!MO.isDef()) { if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg))) @@ -844,7 +843,7 @@ MachineLICMBase::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, if (!MO.isReg() || MO.isImplicit()) continue; Register Reg = MO.getReg(); - if (!Register::isVirtualRegister(Reg)) + if (!Reg.isVirtual()) continue; // FIXME: It seems bad to use RegSeen only for some of these calculations. @@ -916,9 +915,9 @@ static bool isInvariantStore(const MachineInstr &MI, Register Reg = MO.getReg(); // If operand is a virtual register, check if it comes from a copy of a // physical register. - if (Register::isVirtualRegister(Reg)) + if (Reg.isVirtual()) Reg = TRI->lookThruCopyLike(MO.getReg(), MRI); - if (Register::isVirtualRegister(Reg)) + if (Reg.isVirtual()) return false; if (!TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *MI.getMF())) return false; @@ -947,7 +946,7 @@ static bool isCopyFeedingInvariantStore(const MachineInstr &MI, const MachineFunction *MF = MI.getMF(); // Check that we are copying a constant physical register. Register CopySrcReg = MI.getOperand(1).getReg(); - if (Register::isVirtualRegister(CopySrcReg)) + if (CopySrcReg.isVirtual()) return false; if (!TRI->isCallerPreservedPhysReg(CopySrcReg.asMCReg(), *MF)) @@ -955,8 +954,7 @@ static bool isCopyFeedingInvariantStore(const MachineInstr &MI, Register CopyDstReg = MI.getOperand(0).getReg(); // Check if any of the uses of the copy are invariant stores. - assert(Register::isVirtualRegister(CopyDstReg) && - "copy dst is not a virtual reg"); + assert(CopyDstReg.isVirtual() && "copy dst is not a virtual reg"); for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) { if (UseMI.mayStore() && isInvariantStore(UseMI, TRI, MRI)) @@ -1020,7 +1018,7 @@ bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const { if (!MO.isReg() || !MO.isDef()) continue; Register Reg = MO.getReg(); - if (!Register::isVirtualRegister(Reg)) + if (!Reg.isVirtual()) continue; for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { // A PHI may cause a copy to be inserted. @@ -1090,7 +1088,7 @@ bool MachineLICMBase::IsCheapInstruction(MachineInstr &MI) const { continue; --NumDefs; Register Reg = DefMO.getReg(); - if (Register::isPhysicalRegister(Reg)) + if (Reg.isPhysical()) continue; if (!TII->hasLowDefLatency(SchedModel, MI, i)) @@ -1183,7 +1181,7 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) { if (!MO.isReg() || MO.isImplicit()) continue; Register Reg = MO.getReg(); - if (!Register::isVirtualRegister(Reg)) + if (!Reg.isVirtual()) continue; if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) { LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI); @@ -1340,13 +1338,11 @@ bool MachineLICMBase::EliminateCSE( const MachineOperand &MO = MI->getOperand(i); // Physical registers may not differ here. - assert((!MO.isReg() || MO.getReg() == 0 || - !Register::isPhysicalRegister(MO.getReg()) || + assert((!MO.isReg() || MO.getReg() == 0 || !MO.getReg().isPhysical() || MO.getReg() == Dup->getOperand(i).getReg()) && "Instructions with different phys regs are not identical!"); - if (MO.isReg() && MO.isDef() && - !Register::isPhysicalRegister(MO.getReg())) + if (MO.isReg() && MO.isDef() && !MO.getReg().isPhysical()) Defs.push_back(i); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp new file mode 100644 index 000000000000..c400ce190b46 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp @@ -0,0 +1,239 @@ +//==--- MachineLateInstrsCleanup.cpp - Late Instructions Cleanup Pass -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This simple pass removes any identical and redundant immediate or address +// loads to the same register. The immediate loads removed can originally be +// the result of rematerialization, while the addresses are redundant frame +// addressing anchor points created during Frame Indices elimination. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "machine-latecleanup" + +STATISTIC(NumRemoved, "Number of redundant instructions removed."); + +namespace { + +class MachineLateInstrsCleanup : public MachineFunctionPass { + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + + // Data structures to map regs to their definitions per MBB. + using Reg2DefMap = std::map<Register, MachineInstr*>; + std::vector<Reg2DefMap> RegDefs; + + // Walk through the instructions in MBB and remove any redundant + // instructions. + bool processBlock(MachineBasicBlock *MBB); + +public: + static char ID; // Pass identification, replacement for typeid + + MachineLateInstrsCleanup() : MachineFunctionPass(ID) { + initializeMachineLateInstrsCleanupPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } +}; + +} // end anonymous namespace + +char MachineLateInstrsCleanup::ID = 0; + +char &llvm::MachineLateInstrsCleanupID = MachineLateInstrsCleanup::ID; + +INITIALIZE_PASS(MachineLateInstrsCleanup, DEBUG_TYPE, + "Machine Late Instructions Cleanup Pass", false, false) + +bool MachineLateInstrsCleanup::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + TRI = MF.getSubtarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + + RegDefs.clear(); + RegDefs.resize(MF.getNumBlockIDs()); + + // Visit all MBBs in an order that maximises the reuse from predecessors. + bool Changed = false; + ReversePostOrderTraversal<MachineFunction *> RPOT(&MF); + for (MachineBasicBlock *MBB : RPOT) + Changed |= processBlock(MBB); + + return Changed; +} + +// Clear any previous kill flag on Reg found before I in MBB. Walk backwards +// in MBB and if needed continue in predecessors until a use/def of Reg is +// encountered. This seems to be faster in practice than tracking kill flags +// in a map. +static void clearKillsForDef(Register Reg, MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + BitVector &VisitedPreds, + const TargetRegisterInfo *TRI) { + VisitedPreds.set(MBB->getNumber()); + while (I != MBB->begin()) { + --I; + bool Found = false; + for (auto &MO : I->operands()) + if (MO.isReg() && TRI->regsOverlap(MO.getReg(), Reg)) { + if (MO.isDef()) + return; + if (MO.readsReg()) { + MO.setIsKill(false); + Found = true; // Keep going for an implicit kill of the super-reg. + } + } + if (Found) + return; + } + + // If an earlier def is not in MBB, continue in predecessors. + if (!MBB->isLiveIn(Reg)) + MBB->addLiveIn(Reg); + assert(!MBB->pred_empty() && "Predecessor def not found!"); + for (MachineBasicBlock *Pred : MBB->predecessors()) + if (!VisitedPreds.test(Pred->getNumber())) + clearKillsForDef(Reg, Pred, Pred->end(), VisitedPreds, TRI); +} + +static void removeRedundantDef(MachineInstr *MI, + const TargetRegisterInfo *TRI) { + Register Reg = MI->getOperand(0).getReg(); + BitVector VisitedPreds(MI->getMF()->getNumBlockIDs()); + clearKillsForDef(Reg, MI->getParent(), MI->getIterator(), VisitedPreds, TRI); + MI->eraseFromParent(); + ++NumRemoved; +} + +// Return true if MI is a potential candidate for reuse/removal and if so +// also the register it defines in DefedReg. A candidate is a simple +// instruction that does not touch memory, has only one register definition +// and the only reg it may use is FrameReg. Typically this is an immediate +// load or a load-address instruction. +static bool isCandidate(const MachineInstr *MI, Register &DefedReg, + Register FrameReg) { + DefedReg = MCRegister::NoRegister; + bool SawStore = true; + if (!MI->isSafeToMove(nullptr, SawStore) || MI->isImplicitDef() || + MI->isInlineAsm()) + return false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg()) { + if (MO.isDef()) { + if (i == 0 && !MO.isImplicit() && !MO.isDead()) + DefedReg = MO.getReg(); + else + return false; + } else if (MO.getReg() && MO.getReg() != FrameReg) + return false; + } else if (!(MO.isImm() || MO.isCImm() || MO.isFPImm() || MO.isCPI() || + MO.isGlobal() || MO.isSymbol())) + return false; + } + return DefedReg.isValid(); +} + +bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) { + bool Changed = false; + Reg2DefMap &MBBDefs = RegDefs[MBB->getNumber()]; + + // Find reusable definitions in the predecessor(s). + if (!MBB->pred_empty() && !MBB->isEHPad()) { + MachineBasicBlock *FirstPred = *MBB->pred_begin(); + for (auto [Reg, DefMI] : RegDefs[FirstPred->getNumber()]) + if (llvm::all_of( + drop_begin(MBB->predecessors()), + [&, &Reg = Reg, &DefMI = DefMI](const MachineBasicBlock *Pred) { + auto PredDefI = RegDefs[Pred->getNumber()].find(Reg); + return PredDefI != RegDefs[Pred->getNumber()].end() && + DefMI->isIdenticalTo(*PredDefI->second); + })) { + MBBDefs[Reg] = DefMI; + LLVM_DEBUG(dbgs() << "Reusable instruction from pred(s): in " + << printMBBReference(*MBB) << ": " << *DefMI;); + } + } + + // Process MBB. + MachineFunction *MF = MBB->getParent(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + Register FrameReg = TRI->getFrameRegister(*MF); + for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) { + // If FrameReg is modified, no previous load-address instructions (using + // it) are valid. + if (MI.modifiesRegister(FrameReg, TRI)) { + MBBDefs.clear(); + continue; + } + + Register DefedReg; + bool IsCandidate = isCandidate(&MI, DefedReg, FrameReg); + + // Check for an earlier identical and reusable instruction. + if (IsCandidate) { + auto DefI = MBBDefs.find(DefedReg); + if (DefI != MBBDefs.end() && MI.isIdenticalTo(*DefI->second)) { + LLVM_DEBUG(dbgs() << "Removing redundant instruction in " + << printMBBReference(*MBB) << ": " << MI;); + removeRedundantDef(&MI, TRI); + Changed = true; + continue; + } + } + + // Clear any entries in map that MI clobbers. + for (auto DefI = MBBDefs.begin(); DefI != MBBDefs.end();) { + Register Reg = DefI->first; + if (MI.modifiesRegister(Reg, TRI)) + DefI = MBBDefs.erase(DefI); + else + ++DefI; + } + + // Record this MI for potential later reuse. + if (IsCandidate) { + LLVM_DEBUG(dbgs() << "Found interesting instruction in " + << printMBBReference(*MBB) << ": " << MI;); + MBBDefs[DefedReg] = &MI; + } + } + + return Changed; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp index 5cbded4b9264..fb3af385a0c1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -168,7 +168,7 @@ bool MachineLoop::isLoopInvariant(MachineInstr &I) const { // An instruction that uses or defines a physical register can't e.g. be // hoisted, so mark this as not invariant. - if (Register::isPhysicalRegister(Reg)) { + if (Reg.isPhysical()) { if (MO.isUse()) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp index 23d55a5df9f5..a0c0166d06f0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -47,8 +47,6 @@ void MachineModuleInfo::initialize() { } void MachineModuleInfo::finalize() { - Personalities.clear(); - Context.reset(); // We don't clear the ExternalContext. @@ -89,16 +87,6 @@ MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM, MachineModuleInfo::~MachineModuleInfo() { finalize(); } -/// \name Exception Handling -/// \{ - -void MachineModuleInfo::addPersonality(const Function *Personality) { - if (!llvm::is_contained(Personalities, Personality)) - Personalities.push_back(Personality); -} - -/// \} - MachineFunction * MachineModuleInfo::getMachineFunction(const Function &F) const { auto I = MachineFunctions.find(&F); @@ -118,6 +106,7 @@ MachineFunction &MachineModuleInfo::getOrCreateMachineFunction(Function &F) { // No pre-existing machine function, create a new one. const TargetSubtargetInfo &STI = *TM.getSubtargetImpl(F); MF = new MachineFunction(F, TM, STI, NextFnNum++, *this); + MF->initTargetMachineFunctionInfo(STI); // Update the set entry. I.first->second.reset(MF); } else { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp index 46ad1de78c46..0a7b12e9ccb9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StableHashing.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/Config/llvm-config.h" @@ -28,6 +29,7 @@ #include "llvm/MC/MCDwarf.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" +#include <optional> using namespace llvm; @@ -45,6 +47,7 @@ static const MachineFunction *getMFIfAvailable(const MachineOperand &MO) { return MF; return nullptr; } + static MachineFunction *getMFIfAvailable(MachineOperand &MO) { return const_cast<MachineFunction *>( getMFIfAvailable(const_cast<const MachineOperand &>(MO))); @@ -115,7 +118,7 @@ void MachineOperand::setIsDef(bool Val) { bool MachineOperand::isRenamable() const { assert(isReg() && "Wrong MachineOperand accessor"); - assert(Register::isPhysicalRegister(getReg()) && + assert(getReg().isPhysical() && "isRenamable should only be checked on physical registers"); if (!IsRenamable) return false; @@ -133,7 +136,7 @@ bool MachineOperand::isRenamable() const { void MachineOperand::setIsRenamable(bool Val) { assert(isReg() && "Wrong MachineOperand accessor"); - assert(Register::isPhysicalRegister(getReg()) && + assert(getReg().isPhysical() && "setIsRenamable should only be called on physical registers"); IsRenamable = Val; } @@ -233,6 +236,19 @@ void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset, setTargetFlags(TargetFlags); } +void MachineOperand::ChangeToDbgInstrRef(unsigned InstrIdx, unsigned OpIdx, + unsigned TargetFlags) { + assert((!isReg() || !isTied()) && + "Cannot change a tied operand into a DbgInstrRef"); + + removeRegFromUses(); + + OpKind = MO_DbgInstrRef; + setInstrRefInstrIndex(InstrIdx); + setInstrRefOpIndex(OpIdx); + setTargetFlags(TargetFlags); +} + /// ChangeToRegister - Replace this operand with a new register operand of /// the specified value. If an operand is known to be an register already, /// the setReg method should be used. @@ -323,10 +339,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { return true; if (const MachineFunction *MF = getMFIfAvailable(*this)) { - // Calculate the size of the RegMask const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32; - + unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs()); // Deep compare of the two RegMasks return std::equal(RegMask, RegMask + RegMaskSize, OtherRegMask); } @@ -336,6 +350,9 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { } case MachineOperand::MO_MCSymbol: return getMCSymbol() == Other.getMCSymbol(); + case MachineOperand::MO_DbgInstrRef: + return getInstrRefInstrIndex() == Other.getInstrRefInstrIndex() && + getInstrRefOpIndex() == Other.getInstrRefOpIndex(); case MachineOperand::MO_CFIIndex: return getCFIIndex() == Other.getCFIIndex(); case MachineOperand::MO_Metadata: @@ -382,12 +399,27 @@ hash_code llvm::hash_value(const MachineOperand &MO) { return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getBlockAddress(), MO.getOffset()); case MachineOperand::MO_RegisterMask: - case MachineOperand::MO_RegisterLiveOut: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask()); + case MachineOperand::MO_RegisterLiveOut: { + if (const MachineFunction *MF = getMFIfAvailable(MO)) { + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs()); + const uint32_t *RegMask = MO.getRegMask(); + std::vector<stable_hash> RegMaskHashes(RegMask, RegMask + RegMaskSize); + return hash_combine(MO.getType(), MO.getTargetFlags(), + stable_hash_combine_array(RegMaskHashes.data(), + RegMaskHashes.size())); + } + + assert(0 && "MachineOperand not associated with any MachineFunction"); + return hash_combine(MO.getType(), MO.getTargetFlags()); + } case MachineOperand::MO_Metadata: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata()); case MachineOperand::MO_MCSymbol: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol()); + case MachineOperand::MO_DbgInstrRef: + return hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getInstrRefInstrIndex(), MO.getInstrRefOpIndex()); case MachineOperand::MO_CFIIndex: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex()); case MachineOperand::MO_IntrinsicID: @@ -445,7 +477,7 @@ static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS, return; } - if (Optional<unsigned> Reg = TRI->getLLVMRegNum(DwarfReg, true)) + if (std::optional<unsigned> Reg = TRI->getLLVMRegNum(DwarfReg, true)) OS << printReg(*Reg, TRI); else OS << "<badreg>"; @@ -458,7 +490,7 @@ static void printIRBlockReference(raw_ostream &OS, const BasicBlock &BB, printLLVMNameWithoutPrefix(OS, BB.getName()); return; } - Optional<int> Slot; + std::optional<int> Slot; if (const Function *F = BB.getParent()) { if (F == MST.getCurrentFunction()) { Slot = MST.getLocalSlot(&BB); @@ -519,7 +551,7 @@ static void printFrameIndex(raw_ostream& OS, int FrameIndex, bool IsFixed, void MachineOperand::printSubRegIdx(raw_ostream &OS, uint64_t Index, const TargetRegisterInfo *TRI) { OS << "%subreg."; - if (TRI) + if (TRI && Index != 0 && Index < TRI->getNumSubRegIndices()) OS << TRI->getSubRegIndexName(Index); else OS << Index; @@ -736,15 +768,16 @@ void MachineOperand::print(raw_ostream &OS, LLT TypeToPrint, const TargetIntrinsicInfo *IntrinsicInfo) const { tryToGetTargetInfo(*this, TRI, IntrinsicInfo); ModuleSlotTracker DummyMST(nullptr); - print(OS, DummyMST, TypeToPrint, None, /*PrintDef=*/false, + print(OS, DummyMST, TypeToPrint, std::nullopt, /*PrintDef=*/false, /*IsStandalone=*/true, /*ShouldPrintRegisterTies=*/true, /*TiedOperandIdx=*/0, TRI, IntrinsicInfo); } void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, - LLT TypeToPrint, Optional<unsigned> OpIdx, bool PrintDef, - bool IsStandalone, bool ShouldPrintRegisterTies, + LLT TypeToPrint, std::optional<unsigned> OpIdx, + bool PrintDef, bool IsStandalone, + bool ShouldPrintRegisterTies, unsigned TiedOperandIdx, const TargetRegisterInfo *TRI, const TargetIntrinsicInfo *IntrinsicInfo) const { @@ -767,13 +800,13 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "undef "; if (isEarlyClobber()) OS << "early-clobber "; - if (Register::isPhysicalRegister(getReg()) && isRenamable()) + if (getReg().isPhysical() && isRenamable()) OS << "renamable "; // isDebug() is exactly true for register operands of a DBG_VALUE. So we // simply infer it when parsing and do not need to print it. const MachineRegisterInfo *MRI = nullptr; - if (Register::isVirtualRegister(Reg)) { + if (Reg.isVirtual()) { if (const MachineFunction *MF = getMFIfAvailable(*this)) { MRI = &MF->getRegInfo(); } @@ -788,7 +821,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << ".subreg" << SubReg; } // Print the register class / bank. - if (Register::isVirtualRegister(Reg)) { + if (Reg.isVirtual()) { if (const MachineFunction *MF = getMFIfAvailable(*this)) { const MachineRegisterInfo &MRI = MF->getRegInfo(); if (IsStandalone || !PrintDef || MRI.def_empty(Reg)) { @@ -928,6 +961,11 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, case MachineOperand::MO_MCSymbol: printSymbol(OS, *getMCSymbol()); break; + case MachineOperand::MO_DbgInstrRef: { + OS << "dbg-instr-ref(" << getInstrRefInstrIndex() << ", " + << getInstrRefOpIndex() << ')'; + break; + } case MachineOperand::MO_CFIIndex: { if (const MachineFunction *MF = getMFIfAvailable(*this)) printCFI(OS, MF->getFrameInstructions()[getCFIIndex()], TRI); @@ -1102,15 +1140,24 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "dereferenceable "; if (isInvariant()) OS << "invariant "; - if (getFlags() & MachineMemOperand::MOTargetFlag1) - OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag1) - << "\" "; - if (getFlags() & MachineMemOperand::MOTargetFlag2) - OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag2) - << "\" "; - if (getFlags() & MachineMemOperand::MOTargetFlag3) - OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag3) - << "\" "; + if (TII) { + if (getFlags() & MachineMemOperand::MOTargetFlag1) + OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag1) + << "\" "; + if (getFlags() & MachineMemOperand::MOTargetFlag2) + OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag2) + << "\" "; + if (getFlags() & MachineMemOperand::MOTargetFlag3) + OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag3) + << "\" "; + } else { + if (getFlags() & MachineMemOperand::MOTargetFlag1) + OS << "\"MOTargetFlag1\" "; + if (getFlags() & MachineMemOperand::MOTargetFlag2) + OS << "\"MOTargetFlag2\" "; + if (getFlags() & MachineMemOperand::MOTargetFlag3) + OS << "\"MOTargetFlag3\" "; + } assert((isLoad() || isStore()) && "machine memory operand must be a load or store (or both)"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp index 631768ec986c..1c31eba909e7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/LLVMContext.h" #include "llvm/InitializePasses.h" +#include <optional> using namespace llvm; @@ -30,10 +31,10 @@ DiagnosticInfoMIROptimization::MachineArgument::MachineArgument( /*SkipDebugLoc=*/true); } -Optional<uint64_t> +std::optional<uint64_t> MachineOptimizationRemarkEmitter::computeHotness(const MachineBasicBlock &MBB) { if (!MBFI) - return None; + return std::nullopt; return MBFI->getBlockProfileCount(&MBB); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp index 5da68abc8f6a..c7ba66bd3678 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp @@ -727,7 +727,8 @@ MachineFunction *MachineOutliner::createOutlinedFunction( Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()), Unit /* File */, 0 /* Line 0 is reserved for compiler-generated code. */, - DB.createSubroutineType(DB.getOrCreateTypeArray(None)), /* void type */ + DB.createSubroutineType( + DB.getOrCreateTypeArray(std::nullopt)), /* void type */ 0, /* Line 0 is reserved for compiler-generated code. */ DINode::DIFlags::FlagArtificial /* Compiler-generated code. */, /* Outlined code is optimized code by definition. */ @@ -879,10 +880,13 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M, // iterating over each Function in M. for (Function &F : M) { - // If there's nothing in F, then there's no reason to try and outline from - // it. - if (F.empty()) + if (F.hasFnAttribute("nooutline")) { + LLVM_DEBUG({ + dbgs() << "... Skipping function with nooutline attribute: " + << F.getName() << "\n"; + }); continue; + } // There's something in F. Check if it has a MachineFunction associated with // it. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp index 476dc059d2b5..039634f3d047 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp @@ -41,7 +41,7 @@ Error MachineFunctionPassManager::run(Module &M, // current pipeline is the top-level pipeline. Callbacks are not used after // current pipeline. PI.pushBeforeNonSkippedPassCallback([&MFAM](StringRef PassID, Any IR) { - assert(any_isa<const MachineFunction *>(IR)); + assert(any_cast<const MachineFunction *>(&IR)); const MachineFunction *MF = any_cast<const MachineFunction *>(IR); assert(MF && "Machine function should be valid for printing"); std::string Banner = std::string("After ") + std::string(PassID); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp index 52501ca7c871..adb630469003 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -43,6 +43,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CycleAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" @@ -84,9 +85,11 @@ #include <cstdint> #include <deque> #include <functional> +#include <iomanip> #include <iterator> #include <map> #include <memory> +#include <sstream> #include <tuple> #include <utility> #include <vector> @@ -121,6 +124,12 @@ static cl::opt<int> SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27)); +/// A command line argument to force pipeliner to use specified initial +/// interval. +static cl::opt<int> SwpForceII("pipeliner-force-ii", + cl::desc("Force pipeliner to use specified II."), + cl::Hidden, cl::init(-1)); + /// A command line argument to limit the number of stages in the pipeline. static cl::opt<int> SwpMaxStages("pipeliner-max-stages", @@ -172,6 +181,13 @@ cl::opt<bool> SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden, cl::init(true), cl::desc("Enable CopyToPhi DAG Mutation")); +/// A command line argument to force pipeliner to use specified issue +/// width. +cl::opt<int> SwpForceIssueWidth( + "pipeliner-force-issue-width", + cl::desc("Force pipeliner to use specified issue width."), cl::Hidden, + cl::init(-1)); + } // end namespace llvm unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5; @@ -454,14 +470,18 @@ void MachinePipeliner::getAnalysisUsage(AnalysisUsage &AU) const { } void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) { - if (II_setByPragma > 0) + if (SwpForceII > 0) + MII = SwpForceII; + else if (II_setByPragma > 0) MII = II_setByPragma; else MII = std::max(ResMII, RecMII); } void SwingSchedulerDAG::setMAX_II() { - if (II_setByPragma > 0) + if (SwpForceII > 0) + MAX_II = SwpForceII; + else if (II_setByPragma > 0) MAX_II = II_setByPragma; else MAX_II = MII + 10; @@ -560,7 +580,7 @@ void SwingSchedulerDAG::schedule() { // check for node order issues checkValidNodeOrder(Circuits); - SMSchedule Schedule(Pass.MF); + SMSchedule Schedule(Pass.MF, this); Scheduled = schedulePipeline(Schedule); if (!Scheduled){ @@ -1002,7 +1022,7 @@ struct FuncUnitSorter { make_range(InstrItins->beginStage(SchedClass), InstrItins->endStage(SchedClass))) { InstrStage::FuncUnits funcUnits = IS.getUnits(); - unsigned numAlternatives = countPopulation(funcUnits); + unsigned numAlternatives = llvm::popcount(funcUnits); if (numAlternatives < min) { min = numAlternatives; F = funcUnits; @@ -1048,7 +1068,7 @@ struct FuncUnitSorter { make_range(InstrItins->beginStage(SchedClass), InstrItins->endStage(SchedClass))) { InstrStage::FuncUnits FuncUnits = IS.getUnits(); - if (countPopulation(FuncUnits) == 1) + if (llvm::popcount(FuncUnits) == 1) Resources[FuncUnits]++; } return; @@ -1093,72 +1113,9 @@ struct FuncUnitSorter { /// to add it to each existing DFA, until a legal space is found. If the /// instruction cannot be reserved in an existing DFA, we create a new one. unsigned SwingSchedulerDAG::calculateResMII() { - LLVM_DEBUG(dbgs() << "calculateResMII:\n"); - SmallVector<ResourceManager*, 8> Resources; - MachineBasicBlock *MBB = Loop.getHeader(); - Resources.push_back(new ResourceManager(&MF.getSubtarget())); - - // Sort the instructions by the number of available choices for scheduling, - // least to most. Use the number of critical resources as the tie breaker. - FuncUnitSorter FUS = FuncUnitSorter(MF.getSubtarget()); - for (MachineInstr &MI : - llvm::make_range(MBB->getFirstNonPHI(), MBB->getFirstTerminator())) - FUS.calcCriticalResources(MI); - PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter> - FuncUnitOrder(FUS); - - for (MachineInstr &MI : - llvm::make_range(MBB->getFirstNonPHI(), MBB->getFirstTerminator())) - FuncUnitOrder.push(&MI); - - while (!FuncUnitOrder.empty()) { - MachineInstr *MI = FuncUnitOrder.top(); - FuncUnitOrder.pop(); - if (TII->isZeroCost(MI->getOpcode())) - continue; - // Attempt to reserve the instruction in an existing DFA. At least one - // DFA is needed for each cycle. - unsigned NumCycles = getSUnit(MI)->Latency; - unsigned ReservedCycles = 0; - SmallVectorImpl<ResourceManager *>::iterator RI = Resources.begin(); - SmallVectorImpl<ResourceManager *>::iterator RE = Resources.end(); - LLVM_DEBUG({ - dbgs() << "Trying to reserve resource for " << NumCycles - << " cycles for \n"; - MI->dump(); - }); - for (unsigned C = 0; C < NumCycles; ++C) - while (RI != RE) { - if ((*RI)->canReserveResources(*MI)) { - (*RI)->reserveResources(*MI); - ++ReservedCycles; - break; - } - RI++; - } - LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles - << ", NumCycles:" << NumCycles << "\n"); - // Add new DFAs, if needed, to reserve resources. - for (unsigned C = ReservedCycles; C < NumCycles; ++C) { - LLVM_DEBUG(if (SwpDebugResource) dbgs() - << "NewResource created to reserve resources" - << "\n"); - ResourceManager *NewResource = new ResourceManager(&MF.getSubtarget()); - assert(NewResource->canReserveResources(*MI) && "Reserve error."); - NewResource->reserveResources(*MI); - Resources.push_back(NewResource); - } - } - int Resmii = Resources.size(); - LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n"); - // Delete the memory for each of the DFAs that were created earlier. - for (ResourceManager *RI : Resources) { - ResourceManager *D = RI; - delete D; - } - Resources.clear(); - return Resmii; + ResourceManager RM(&MF.getSubtarget(), this); + return RM.calculateResMII(); } /// Calculate the recurrence-constrainted minimum initiation interval. @@ -1605,7 +1562,7 @@ static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, for (const MachineOperand &MO : MI->operands()) if (MO.isReg() && MO.isUse()) { Register Reg = MO.getReg(); - if (Register::isVirtualRegister(Reg)) + if (Reg.isVirtual()) Uses.insert(Reg); else if (MRI.isAllocatable(Reg)) for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid(); @@ -1617,7 +1574,7 @@ static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, for (const MachineOperand &MO : SU->getInstr()->operands()) if (MO.isReg() && MO.isDef() && !MO.isDead()) { Register Reg = MO.getReg(); - if (Register::isVirtualRegister(Reg)) { + if (Reg.isVirtual()) { if (!Uses.count(Reg)) LiveOutRegs.push_back(RegisterMaskPair(Reg, LaneBitmask::getNone())); @@ -2099,6 +2056,12 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { << ")\n"); if (scheduleFound) { + scheduleFound = LoopPipelinerInfo->shouldUseSchedule(*this, Schedule); + if (!scheduleFound) + LLVM_DEBUG(dbgs() << "Target rejected schedule\n"); + } + + if (scheduleFound) { Schedule.finalizeSchedule(this); Pass.ORE->emit([&]() { return MachineOptimizationRemarkAnalysis( @@ -2314,20 +2277,28 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep, assert(!OffsetSIsScalable && !OffsetDIsScalable && "Expected offsets to be byte offsets"); - if (!BaseOpS->isIdenticalTo(*BaseOpD)) + MachineInstr *DefS = MRI.getVRegDef(BaseOpS->getReg()); + MachineInstr *DefD = MRI.getVRegDef(BaseOpD->getReg()); + if (!DefS || !DefD || !DefS->isPHI() || !DefD->isPHI()) + return true; + + unsigned InitValS = 0; + unsigned LoopValS = 0; + unsigned InitValD = 0; + unsigned LoopValD = 0; + getPhiRegs(*DefS, BB, InitValS, LoopValS); + getPhiRegs(*DefD, BB, InitValD, LoopValD); + MachineInstr *InitDefS = MRI.getVRegDef(InitValS); + MachineInstr *InitDefD = MRI.getVRegDef(InitValD); + + if (!InitDefS->isIdenticalTo(*InitDefD)) return true; // Check that the base register is incremented by a constant value for each // iteration. - MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg()); - if (!Def || !Def->isPHI()) - return true; - unsigned InitVal = 0; - unsigned LoopVal = 0; - getPhiRegs(*Def, BB, InitVal, LoopVal); - MachineInstr *LoopDef = MRI.getVRegDef(LoopVal); + MachineInstr *LoopDefS = MRI.getVRegDef(LoopValS); int D = 0; - if (!LoopDef || !TII->getIncrementValue(*LoopDef, D)) + if (!LoopDefS || !TII->getIncrementValue(*LoopDefS, D)) return true; uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize(); @@ -2369,28 +2340,15 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) { for (int curCycle = StartCycle; curCycle != termCycle; forward ? ++curCycle : --curCycle) { - // Add the already scheduled instructions at the specified cycle to the - // DFA. - ProcItinResources.clearResources(); - for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II); - checkCycle <= LastCycle; checkCycle += II) { - std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle]; - - for (SUnit *CI : cycleInstrs) { - if (ST.getInstrInfo()->isZeroCost(CI->getInstr()->getOpcode())) - continue; - assert(ProcItinResources.canReserveResources(*CI->getInstr()) && - "These instructions have already been scheduled."); - ProcItinResources.reserveResources(*CI->getInstr()); - } - } if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) || - ProcItinResources.canReserveResources(*SU->getInstr())) { + ProcItinResources.canReserveResources(*SU, curCycle)) { LLVM_DEBUG({ dbgs() << "\tinsert at cycle " << curCycle << " "; SU->getInstr()->dump(); }); + if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode())) + ProcItinResources.reserveResources(*SU, curCycle); ScheduledInstrs[curCycle].push_back(SU); InstrToCycle.insert(std::make_pair(SU, curCycle)); if (curCycle > LastCycle) @@ -2542,7 +2500,7 @@ void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E; ++I, ++Pos) { for (MachineOperand &MO : MI->operands()) { - if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !MO.getReg().isVirtual()) continue; Register Reg = MO.getReg(); @@ -3019,6 +2977,26 @@ void SMSchedule::print(raw_ostream &os) const { LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); } LLVM_DUMP_METHOD void NodeSet::dump() const { print(dbgs()); } +void ResourceManager::dumpMRT() const { + LLVM_DEBUG({ + if (UseDFA) + return; + std::stringstream SS; + SS << "MRT:\n"; + SS << std::setw(4) << "Slot"; + for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) + SS << std::setw(3) << I; + SS << std::setw(7) << "#Mops" + << "\n"; + for (int Slot = 0; Slot < InitiationInterval; ++Slot) { + SS << std::setw(4) << Slot; + for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) + SS << std::setw(3) << MRT[Slot][I]; + SS << std::setw(7) << NumScheduledMops[Slot] << "\n"; + } + dbgs() << SS.str(); + }); +} #endif void ResourceManager::initProcResourceVectors( @@ -3063,97 +3041,244 @@ void ResourceManager::initProcResourceVectors( }); } -bool ResourceManager::canReserveResources(const MCInstrDesc *MID) const { - +bool ResourceManager::canReserveResources(SUnit &SU, int Cycle) { LLVM_DEBUG({ if (SwpDebugResource) dbgs() << "canReserveResources:\n"; }); if (UseDFA) - return DFAResources->canReserveResources(MID); + return DFAResources[positiveModulo(Cycle, InitiationInterval)] + ->canReserveResources(&SU.getInstr()->getDesc()); - unsigned InsnClass = MID->getSchedClass(); - const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass); + const MCSchedClassDesc *SCDesc = DAG->getSchedClass(&SU); if (!SCDesc->isValid()) { LLVM_DEBUG({ dbgs() << "No valid Schedule Class Desc for schedClass!\n"; - dbgs() << "isPseudo:" << MID->isPseudo() << "\n"; + dbgs() << "isPseudo:" << SU.getInstr()->isPseudo() << "\n"; }); return true; } - const MCWriteProcResEntry *I = STI->getWriteProcResBegin(SCDesc); - const MCWriteProcResEntry *E = STI->getWriteProcResEnd(SCDesc); - for (; I != E; ++I) { - if (!I->Cycles) - continue; - const MCProcResourceDesc *ProcResource = - SM.getProcResource(I->ProcResourceIdx); - unsigned NumUnits = ProcResource->NumUnits; - LLVM_DEBUG({ - if (SwpDebugResource) - dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n", - ProcResource->Name, I->ProcResourceIdx, - ProcResourceCount[I->ProcResourceIdx], NumUnits, - I->Cycles); - }); - if (ProcResourceCount[I->ProcResourceIdx] >= NumUnits) - return false; - } - LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return true\n\n";); - return true; + reserveResources(SCDesc, Cycle); + bool Result = !isOverbooked(); + unreserveResources(SCDesc, Cycle); + + LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return " << Result << "\n\n";); + return Result; } -void ResourceManager::reserveResources(const MCInstrDesc *MID) { +void ResourceManager::reserveResources(SUnit &SU, int Cycle) { LLVM_DEBUG({ if (SwpDebugResource) dbgs() << "reserveResources:\n"; }); if (UseDFA) - return DFAResources->reserveResources(MID); + return DFAResources[positiveModulo(Cycle, InitiationInterval)] + ->reserveResources(&SU.getInstr()->getDesc()); - unsigned InsnClass = MID->getSchedClass(); - const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass); + const MCSchedClassDesc *SCDesc = DAG->getSchedClass(&SU); if (!SCDesc->isValid()) { LLVM_DEBUG({ dbgs() << "No valid Schedule Class Desc for schedClass!\n"; - dbgs() << "isPseudo:" << MID->isPseudo() << "\n"; + dbgs() << "isPseudo:" << SU.getInstr()->isPseudo() << "\n"; }); return; } - for (const MCWriteProcResEntry &PRE : - make_range(STI->getWriteProcResBegin(SCDesc), - STI->getWriteProcResEnd(SCDesc))) { - if (!PRE.Cycles) - continue; - ++ProcResourceCount[PRE.ProcResourceIdx]; - LLVM_DEBUG({ - if (SwpDebugResource) { - const MCProcResourceDesc *ProcResource = - SM.getProcResource(PRE.ProcResourceIdx); - dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n", - ProcResource->Name, PRE.ProcResourceIdx, - ProcResourceCount[PRE.ProcResourceIdx], - ProcResource->NumUnits, PRE.Cycles); - } - }); - } + + reserveResources(SCDesc, Cycle); + LLVM_DEBUG({ - if (SwpDebugResource) + if (SwpDebugResource) { + dumpMRT(); dbgs() << "reserveResources: done!\n\n"; + } }); } -bool ResourceManager::canReserveResources(const MachineInstr &MI) const { - return canReserveResources(&MI.getDesc()); +void ResourceManager::reserveResources(const MCSchedClassDesc *SCDesc, + int Cycle) { + assert(!UseDFA); + for (const MCWriteProcResEntry &PRE : make_range( + STI->getWriteProcResBegin(SCDesc), STI->getWriteProcResEnd(SCDesc))) + for (int C = Cycle; C < Cycle + PRE.Cycles; ++C) + ++MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx]; + + for (int C = Cycle; C < Cycle + SCDesc->NumMicroOps; ++C) + ++NumScheduledMops[positiveModulo(C, InitiationInterval)]; +} + +void ResourceManager::unreserveResources(const MCSchedClassDesc *SCDesc, + int Cycle) { + assert(!UseDFA); + for (const MCWriteProcResEntry &PRE : make_range( + STI->getWriteProcResBegin(SCDesc), STI->getWriteProcResEnd(SCDesc))) + for (int C = Cycle; C < Cycle + PRE.Cycles; ++C) + --MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx]; + + for (int C = Cycle; C < Cycle + SCDesc->NumMicroOps; ++C) + --NumScheduledMops[positiveModulo(C, InitiationInterval)]; } -void ResourceManager::reserveResources(const MachineInstr &MI) { - return reserveResources(&MI.getDesc()); +bool ResourceManager::isOverbooked() const { + assert(!UseDFA); + for (int Slot = 0; Slot < InitiationInterval; ++Slot) { + for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc *Desc = SM.getProcResource(I); + if (MRT[Slot][I] > Desc->NumUnits) + return true; + } + if (NumScheduledMops[Slot] > IssueWidth) + return true; + } + return false; +} + +int ResourceManager::calculateResMIIDFA() const { + assert(UseDFA); + + // Sort the instructions by the number of available choices for scheduling, + // least to most. Use the number of critical resources as the tie breaker. + FuncUnitSorter FUS = FuncUnitSorter(*ST); + for (SUnit &SU : DAG->SUnits) + FUS.calcCriticalResources(*SU.getInstr()); + PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter> + FuncUnitOrder(FUS); + + for (SUnit &SU : DAG->SUnits) + FuncUnitOrder.push(SU.getInstr()); + + SmallVector<std::unique_ptr<DFAPacketizer>, 8> Resources; + Resources.push_back( + std::unique_ptr<DFAPacketizer>(TII->CreateTargetScheduleState(*ST))); + + while (!FuncUnitOrder.empty()) { + MachineInstr *MI = FuncUnitOrder.top(); + FuncUnitOrder.pop(); + if (TII->isZeroCost(MI->getOpcode())) + continue; + + // Attempt to reserve the instruction in an existing DFA. At least one + // DFA is needed for each cycle. + unsigned NumCycles = DAG->getSUnit(MI)->Latency; + unsigned ReservedCycles = 0; + auto *RI = Resources.begin(); + auto *RE = Resources.end(); + LLVM_DEBUG({ + dbgs() << "Trying to reserve resource for " << NumCycles + << " cycles for \n"; + MI->dump(); + }); + for (unsigned C = 0; C < NumCycles; ++C) + while (RI != RE) { + if ((*RI)->canReserveResources(*MI)) { + (*RI)->reserveResources(*MI); + ++ReservedCycles; + break; + } + RI++; + } + LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles + << ", NumCycles:" << NumCycles << "\n"); + // Add new DFAs, if needed, to reserve resources. + for (unsigned C = ReservedCycles; C < NumCycles; ++C) { + LLVM_DEBUG(if (SwpDebugResource) dbgs() + << "NewResource created to reserve resources" + << "\n"); + auto *NewResource = TII->CreateTargetScheduleState(*ST); + assert(NewResource->canReserveResources(*MI) && "Reserve error."); + NewResource->reserveResources(*MI); + Resources.push_back(std::unique_ptr<DFAPacketizer>(NewResource)); + } + } + + int Resmii = Resources.size(); + LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n"); + return Resmii; } -void ResourceManager::clearResources() { +int ResourceManager::calculateResMII() const { if (UseDFA) - return DFAResources->clearResources(); - std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0); + return calculateResMIIDFA(); + + // Count each resource consumption and divide it by the number of units. + // ResMII is the max value among them. + + int NumMops = 0; + SmallVector<uint64_t> ResourceCount(SM.getNumProcResourceKinds()); + for (SUnit &SU : DAG->SUnits) { + if (TII->isZeroCost(SU.getInstr()->getOpcode())) + continue; + + const MCSchedClassDesc *SCDesc = DAG->getSchedClass(&SU); + if (!SCDesc->isValid()) + continue; + + LLVM_DEBUG({ + if (SwpDebugResource) { + DAG->dumpNode(SU); + dbgs() << " #Mops: " << SCDesc->NumMicroOps << "\n" + << " WriteProcRes: "; + } + }); + NumMops += SCDesc->NumMicroOps; + for (const MCWriteProcResEntry &PRE : + make_range(STI->getWriteProcResBegin(SCDesc), + STI->getWriteProcResEnd(SCDesc))) { + LLVM_DEBUG({ + if (SwpDebugResource) { + const MCProcResourceDesc *Desc = + SM.getProcResource(PRE.ProcResourceIdx); + dbgs() << Desc->Name << ": " << PRE.Cycles << ", "; + } + }); + ResourceCount[PRE.ProcResourceIdx] += PRE.Cycles; + } + LLVM_DEBUG(if (SwpDebugResource) dbgs() << "\n"); + } + + int Result = (NumMops + IssueWidth - 1) / IssueWidth; + LLVM_DEBUG({ + if (SwpDebugResource) + dbgs() << "#Mops: " << NumMops << ", " + << "IssueWidth: " << IssueWidth << ", " + << "Cycles: " << Result << "\n"; + }); + + LLVM_DEBUG({ + if (SwpDebugResource) { + std::stringstream SS; + SS << std::setw(2) << "ID" << std::setw(16) << "Name" << std::setw(10) + << "Units" << std::setw(10) << "Consumed" << std::setw(10) << "Cycles" + << "\n"; + dbgs() << SS.str(); + } + }); + for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc *Desc = SM.getProcResource(I); + int Cycles = (ResourceCount[I] + Desc->NumUnits - 1) / Desc->NumUnits; + LLVM_DEBUG({ + if (SwpDebugResource) { + std::stringstream SS; + SS << std::setw(2) << I << std::setw(16) << Desc->Name << std::setw(10) + << Desc->NumUnits << std::setw(10) << ResourceCount[I] + << std::setw(10) << Cycles << "\n"; + dbgs() << SS.str(); + } + }); + if (Cycles > Result) + Result = Cycles; + } + return Result; +} + +void ResourceManager::init(int II) { + InitiationInterval = II; + DFAResources.clear(); + DFAResources.resize(II); + for (auto &I : DFAResources) + I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST)); + MRT.clear(); + MRT.resize(II, SmallVector<uint64_t>(SM.getNumProcResourceKinds())); + NumScheduledMops.clear(); + NumScheduledMops.resize(II); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp index 511bb80052c2..1ad08e19feae 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -48,6 +48,7 @@ MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF) RegAllocHints.reserve(256); UsedPhysRegMask.resize(NumRegs); PhysRegUseDefLists.reset(new MachineOperand*[NumRegs]()); + TheDelegates.clear(); } /// setRegClass - Set the register class of the specified virtual register. @@ -79,10 +80,10 @@ constrainRegClass(MachineRegisterInfo &MRI, Register Reg, return NewRC; } -const TargetRegisterClass * -MachineRegisterInfo::constrainRegClass(Register Reg, - const TargetRegisterClass *RC, - unsigned MinNumRegs) { +const TargetRegisterClass *MachineRegisterInfo::constrainRegClass( + Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs) { + if (Reg.isPhysical()) + return nullptr; return ::constrainRegClass(*this, Reg, getRegClass(Reg), RC, MinNumRegs); } @@ -162,8 +163,7 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass, // New virtual register number. Register Reg = createIncompleteVirtualRegister(Name); VRegInfo[Reg].first = RegClass; - if (TheDelegate) - TheDelegate->MRI_NoteNewVirtualRegister(Reg); + noteNewVirtualRegister(Reg); return Reg; } @@ -172,8 +172,7 @@ Register MachineRegisterInfo::cloneVirtualRegister(Register VReg, Register Reg = createIncompleteVirtualRegister(Name); VRegInfo[Reg].first = VRegInfo[VReg].first; setType(Reg, getType(VReg)); - if (TheDelegate) - TheDelegate->MRI_NoteNewVirtualRegister(Reg); + noteCloneVirtualRegister(Reg, VReg); return Reg; } @@ -189,8 +188,7 @@ MachineRegisterInfo::createGenericVirtualRegister(LLT Ty, StringRef Name) { // FIXME: Should we use a dummy register class? VRegInfo[Reg].first = static_cast<RegisterBank *>(nullptr); setType(Reg, Ty); - if (TheDelegate) - TheDelegate->MRI_NoteNewVirtualRegister(Reg); + noteNewVirtualRegister(Reg); return Reg; } @@ -204,7 +202,11 @@ void MachineRegisterInfo::clearVirtRegs() { if (!VRegInfo[Reg].second) continue; verifyUseList(Reg); - llvm_unreachable("Remaining virtual register operands"); + errs() << "Remaining virtual register " + << printReg(Reg, getTargetRegisterInfo()) << "...\n"; + for (MachineInstr &MI : reg_instructions(Reg)) + errs() << "...in instruction: " << MI << "\n"; + std::abort(); } #endif VRegInfo.clear(); @@ -382,7 +384,7 @@ void MachineRegisterInfo::replaceRegWith(Register FromReg, Register ToReg) { // TODO: This could be more efficient by bulk changing the operands. for (MachineOperand &O : llvm::make_early_inc_range(reg_operands(FromReg))) { - if (Register::isPhysicalRegister(ToReg)) { + if (ToReg.isPhysical()) { O.substPhysReg(ToReg, *TRI); } else { O.setReg(ToReg); @@ -420,6 +422,12 @@ bool MachineRegisterInfo::hasOneNonDBGUser(Register RegNo) const { return hasSingleElement(use_nodbg_instructions(RegNo)); } +bool MachineRegisterInfo::hasAtMostUserInstrs(Register Reg, + unsigned MaxUsers) const { + return hasNItemsOrLess(use_instr_nodbg_begin(Reg), use_instr_nodbg_end(), + MaxUsers); +} + /// clearKillFlags - Iterate over all the uses of the given register and /// clear the kill flag from the MachineOperand. This function is used by /// optimization passes which extend register lifetimes and need only @@ -488,7 +496,7 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(Register Reg) const { // Lane masks are only defined for vregs. - assert(Register::isVirtualRegister(Reg)); + assert(Reg.isVirtual()); const TargetRegisterClass &TRC = *getRegClass(Reg); return TRC.getLaneMask(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp index 01cea85ecc7c..6de8f8da9254 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp @@ -21,20 +21,52 @@ using namespace llvm; -MachineBasicBlock *MachineSSAContext::getEntryBlock(MachineFunction &F) { - return &F.front(); -} +const Register MachineSSAContext::ValueRefNull{}; void MachineSSAContext::setFunction(MachineFunction &Fn) { MF = &Fn; RegInfo = &MF->getRegInfo(); } -Printable MachineSSAContext::print(MachineBasicBlock *Block) const { +MachineBasicBlock *MachineSSAContext::getEntryBlock(MachineFunction &F) { + return &F.front(); +} + +void MachineSSAContext::appendBlockTerms( + SmallVectorImpl<const MachineInstr *> &terms, + const MachineBasicBlock &block) { + for (auto &T : block.terminators()) + terms.push_back(&T); +} + +void MachineSSAContext::appendBlockDefs(SmallVectorImpl<Register> &defs, + const MachineBasicBlock &block) { + for (const MachineInstr &instr : block.instrs()) { + for (const MachineOperand &op : instr.operands()) { + if (op.isReg() && op.isDef()) + defs.push_back(op.getReg()); + } + } +} + +/// Get the defining block of a value. +MachineBasicBlock *MachineSSAContext::getDefBlock(Register value) const { + if (!value) + return nullptr; + return RegInfo->getVRegDef(value)->getParent(); +} + +bool MachineSSAContext::isConstantValuePhi(const MachineInstr &Phi) { + return Phi.isConstantValuePHI(); +} + +Printable MachineSSAContext::print(const MachineBasicBlock *Block) const { + if (!Block) + return Printable([](raw_ostream &Out) { Out << "<nullptr>"; }); return Printable([Block](raw_ostream &Out) { Block->printName(Out); }); } -Printable MachineSSAContext::print(MachineInstr *I) const { +Printable MachineSSAContext::print(const MachineInstr *I) const { return Printable([I](raw_ostream &Out) { I->print(Out); }); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp index e5cd46268600..5ab5a40e7574 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp @@ -95,9 +95,15 @@ cl::opt<bool> ViewMISchedDAGs( cl::desc("Pop up a window to show MISched dags after they are processed")); cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden, cl::desc("Print schedule DAGs")); +cl::opt<bool> MISchedDumpReservedCycles( + "misched-dump-reserved-cycles", cl::Hidden, cl::init(false), + cl::desc("Dump resource usage at schedule boundary.")); #else const bool ViewMISchedDAGs = false; const bool PrintDAGs = false; +#ifdef LLVM_ENABLE_DUMP +const bool MISchedDumpReservedCycles = false; +#endif // LLVM_ENABLE_DUMP #endif // NDEBUG } // end namespace llvm @@ -955,7 +961,7 @@ void ScheduleDAGMILive::collectVRegUses(SUnit &SU) { continue; Register Reg = MO.getReg(); - if (!Register::isVirtualRegister(Reg)) + if (!Reg.isVirtual()) continue; // Ignore re-defs. @@ -1116,7 +1122,7 @@ void ScheduleDAGMILive::updatePressureDiffs( for (const RegisterMaskPair &P : LiveUses) { Register Reg = P.RegUnit; /// FIXME: Currently assuming single-use physregs. - if (!Register::isVirtualRegister(Reg)) + if (!Reg.isVirtual()) continue; if (ShouldTrackLaneMasks) { @@ -1340,7 +1346,7 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { // Visit each live out vreg def to find def/use pairs that cross iterations. for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) { Register Reg = P.RegUnit; - if (!Register::isVirtualRegister(Reg)) + if (!Reg.isVirtual()) continue; const LiveInterval &LI = LIS->getInterval(Reg); const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); @@ -1823,12 +1829,12 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { // Check for pure vreg copies. const MachineOperand &SrcOp = Copy->getOperand(1); Register SrcReg = SrcOp.getReg(); - if (!Register::isVirtualRegister(SrcReg) || !SrcOp.readsReg()) + if (!SrcReg.isVirtual() || !SrcOp.readsReg()) return; const MachineOperand &DstOp = Copy->getOperand(0); Register DstReg = DstOp.getReg(); - if (!Register::isVirtualRegister(DstReg) || DstOp.isDead()) + if (!DstReg.isVirtual() || DstOp.isDead()) return; // Check if either the dest or source is local. If it's live across a back @@ -2589,6 +2595,28 @@ SUnit *SchedBoundary::pickOnlyChoice() { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + +/// Dump the content of the \ref ReservedCycles vector for the +/// resources that are used in the basic block. +/// +LLVM_DUMP_METHOD void SchedBoundary::dumpReservedCycles() const { + if (!SchedModel->hasInstrSchedModel()) + return; + + unsigned ResourceCount = SchedModel->getNumProcResourceKinds(); + unsigned StartIdx = 0; + + for (unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) { + const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits; + std::string ResName = SchedModel->getResourceName(ResIdx); + for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) { + dbgs() << ResName << "(" << UnitIdx + << ") = " << ReservedCycles[StartIdx + UnitIdx] << "\n"; + } + StartIdx += NumUnits; + } +} + // This is useful information to dump after bumpNode. // Note that the Queue contents are more useful before pickNodeFromQueue. LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const { @@ -2611,6 +2639,8 @@ LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const { << "\n ExpectedLatency: " << ExpectedLatency << "c\n" << (IsResourceLimited ? " - Resource" : " - Latency") << " limited.\n"; + if (MISchedDumpReservedCycles) + dumpReservedCycles(); } #endif @@ -3102,12 +3132,12 @@ int biasPhysReg(const SUnit *SU, bool isTop) { unsigned UnscheduledOper = isTop ? 0 : 1; // If we have already scheduled the physreg produce/consumer, immediately // schedule the copy. - if (Register::isPhysicalRegister(MI->getOperand(ScheduledOper).getReg())) + if (MI->getOperand(ScheduledOper).getReg().isPhysical()) return 1; // If the physreg is at the boundary, defer it. Otherwise schedule it // immediately to free the dependent. We can hoist the copy later. bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft; - if (Register::isPhysicalRegister(MI->getOperand(UnscheduledOper).getReg())) + if (MI->getOperand(UnscheduledOper).getReg().isPhysical()) return AtBoundary ? -1 : 1; } @@ -3117,7 +3147,7 @@ int biasPhysReg(const SUnit *SU, bool isTop) { // physical registers. bool DoBias = true; for (const MachineOperand &Op : MI->defs()) { - if (Op.isReg() && !Register::isPhysicalRegister(Op.getReg())) { + if (Op.isReg() && !Op.getReg().isPhysical()) { DoBias = false; break; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp index 0568bc6a4600..8429d468254a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp @@ -275,8 +275,8 @@ bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI, Register SrcReg = MI.getOperand(1).getReg(); Register DstReg = MI.getOperand(0).getReg(); - if (!Register::isVirtualRegister(SrcReg) || - !Register::isVirtualRegister(DstReg) || !MRI->hasOneNonDBGUse(SrcReg)) + if (!SrcReg.isVirtual() || !DstReg.isVirtual() || + !MRI->hasOneNonDBGUse(SrcReg)) return false; const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); @@ -309,7 +309,7 @@ bool MachineSinking::AllUsesDominatedByBlock(Register Reg, MachineBasicBlock *DefMBB, bool &BreakPHIEdge, bool &LocalUse) const { - assert(Register::isVirtualRegister(Reg) && "Only makes sense for vregs"); + assert(Reg.isVirtual() && "Only makes sense for vregs"); // Ignore debug uses because debug info doesn't affect the code. if (MRI->use_nodbg_empty(Reg)) @@ -611,7 +611,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI, // We don't move live definitions of physical registers, // so sinking their uses won't enable any opportunities. - if (Register::isPhysicalRegister(Reg)) + if (Reg.isPhysical()) continue; // If this instruction is the only user of a virtual register, @@ -805,7 +805,7 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, if (Reg == 0) continue; - if (Register::isPhysicalRegister(Reg)) { + if (Reg.isPhysical()) { if (MO.isUse() && (MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO))) continue; @@ -910,7 +910,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, Register Reg = MO.getReg(); if (Reg == 0) continue; - if (Register::isPhysicalRegister(Reg)) { + if (Reg.isPhysical()) { if (MO.isUse()) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, @@ -1323,7 +1323,7 @@ static bool blockPrologueInterferes(MachineBasicBlock *BB, if (!Reg) continue; if (MO.isUse()) { - if (Register::isPhysicalRegister(Reg) && + if (Reg.isPhysical() && (TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(Reg)))) continue; if (PI->modifiesRegister(Reg, TRI)) @@ -1387,7 +1387,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, if (!MO.isReg() || MO.isUse()) continue; Register Reg = MO.getReg(); - if (Reg == 0 || !Register::isPhysicalRegister(Reg)) + if (Reg == 0 || !Reg.isPhysical()) continue; if (SuccToSinkTo->isLiveIn(Reg)) return false; @@ -1779,11 +1779,11 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, // We must sink this DBG_VALUE if its operand is sunk. To avoid searching // for DBG_VALUEs later, record them when they're encountered. - if (MI.isDebugValue()) { + if (MI.isDebugValue() && !MI.isDebugRef()) { SmallDenseMap<MCRegister, SmallVector<unsigned, 2>, 4> MIUnits; bool IsValid = true; for (MachineOperand &MO : MI.debug_operands()) { - if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) { + if (MO.isReg() && MO.getReg().isPhysical()) { // Bail if we can already tell the sink would be rejected, rather // than needlessly accumulating lots of DBG_VALUEs. if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp index b546a5082b07..9628e4c5aeb5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp @@ -63,7 +63,7 @@ STATISTIC(StableHashBailingMetadataUnsupported, stable_hash llvm::stableHashValue(const MachineOperand &MO) { switch (MO.getType()) { case MachineOperand::MO_Register: - if (Register::isVirtualRegister(MO.getReg())) { + if (MO.getReg().isVirtual()) { const MachineRegisterInfo &MRI = MO.getParent()->getMF()->getRegInfo(); SmallVector<unsigned> DefOpcodes; for (auto &Def : MRI.def_instructions(MO.getReg())) @@ -119,8 +119,26 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) { stable_hash_combine_string(MO.getSymbolName())); case MachineOperand::MO_RegisterMask: - case MachineOperand::MO_RegisterLiveOut: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask()); + case MachineOperand::MO_RegisterLiveOut: { + if (const MachineInstr *MI = MO.getParent()) { + if (const MachineBasicBlock *MBB = MI->getParent()) { + if (const MachineFunction *MF = MBB->getParent()) { + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + unsigned RegMaskSize = + MachineOperand::getRegMaskSize(TRI->getNumRegs()); + const uint32_t *RegMask = MO.getRegMask(); + std::vector<llvm::stable_hash> RegMaskHashes(RegMask, + RegMask + RegMaskSize); + return hash_combine(MO.getType(), MO.getTargetFlags(), + stable_hash_combine_array(RegMaskHashes.data(), + RegMaskHashes.size())); + } + } + } + + assert(0 && "MachineOperand not associated with any MachineFunction"); + return hash_combine(MO.getType(), MO.getTargetFlags()); + } case MachineOperand::MO_ShuffleMask: { std::vector<llvm::stable_hash> ShuffleMaskHashes; @@ -147,6 +165,9 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) { case MachineOperand::MO_Predicate: return stable_hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate()); + case MachineOperand::MO_DbgInstrRef: + return stable_hash_combine(MO.getType(), MO.getInstrRefInstrIndex(), + MO.getInstrRefOpIndex()); } llvm_unreachable("Invalid machine operand type"); } @@ -164,8 +185,7 @@ stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs, HashComponents.push_back(MI.getOpcode()); HashComponents.push_back(MI.getFlags()); for (const MachineOperand &MO : MI.operands()) { - if (!HashVRegs && MO.isReg() && MO.isDef() && - Register::isVirtualRegister(MO.getReg())) + if (!HashVRegs && MO.isReg() && MO.isDef() && MO.getReg().isVirtual()) continue; // Skip virtual register defs. if (MO.isCPI()) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp index 715e5da26989..5c6efd4af074 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -9,7 +9,6 @@ #include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -147,7 +146,7 @@ MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const { "getResources() must be called before getProcResourceCycles()"); unsigned PRKinds = SchedModel.getNumProcResourceKinds(); assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size()); - return makeArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds); + return ArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds); } //===----------------------------------------------------------------------===// @@ -265,7 +264,7 @@ MachineTraceMetrics::Ensemble:: getProcResourceDepths(unsigned MBBNum) const { unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size()); - return makeArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds); + return ArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds); } /// Get an array of processor resource heights for MBB. Indexed by processor @@ -278,7 +277,7 @@ MachineTraceMetrics::Ensemble:: getProcResourceHeights(unsigned MBBNum) const { unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size()); - return makeArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds); + return ArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds); } //===----------------------------------------------------------------------===// @@ -352,7 +351,7 @@ MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) { // Select the preferred successor for MBB. const MachineBasicBlock* MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) { - if (MBB->pred_empty()) + if (MBB->succ_empty()) return nullptr; const MachineLoop *CurLoop = getLoopFor(MBB); const MachineBasicBlock *Best = nullptr; @@ -449,7 +448,7 @@ public: void finishPostorder(const MachineBasicBlock*) {} - bool insertEdge(Optional<const MachineBasicBlock *> From, + bool insertEdge(std::optional<const MachineBasicBlock *> From, const MachineBasicBlock *To) { // Skip already visited To blocks. MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()]; @@ -664,7 +663,7 @@ static bool getDataDeps(const MachineInstr &UseMI, Register Reg = MO.getReg(); if (!Reg) continue; - if (Register::isPhysicalRegister(Reg)) { + if (Reg.isPhysical()) { HasPhysRegs = true; continue; } @@ -903,7 +902,7 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, if (!MO.isReg()) continue; Register Reg = MO.getReg(); - if (!Register::isPhysicalRegister(Reg)) + if (!Reg.isPhysical()) continue; if (MO.readsReg()) ReadOps.push_back(MI.getOperandNo(MOI)); @@ -980,7 +979,7 @@ addLiveIns(const MachineInstr *DefMI, unsigned DefOp, ArrayRef<const MachineBasicBlock*> Trace) { assert(!Trace.empty() && "Trace should contain at least one block"); Register Reg = DefMI->getOperand(DefOp).getReg(); - assert(Register::isVirtualRegister(Reg)); + assert(Reg.isVirtual()); const MachineBasicBlock *DefMBB = DefMI->getParent(); // Reg is live-in to all blocks in Trace that follow DefMBB. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp new file mode 100644 index 000000000000..2fe5e40a58c2 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp @@ -0,0 +1,223 @@ +//===- MachineUniformityAnalysis.cpp --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineUniformityAnalysis.h" +#include "llvm/ADT/GenericUniformityImpl.h" +#include "llvm/CodeGen/MachineCycleAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSSAContext.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/InitializePasses.h" + +using namespace llvm; + +template <> +bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::hasDivergentDefs( + const MachineInstr &I) const { + for (auto &op : I.operands()) { + if (!op.isReg() || !op.isDef()) + continue; + if (isDivergent(op.getReg())) + return true; + } + return false; +} + +template <> +bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::markDefsDivergent( + const MachineInstr &Instr, bool AllDefsDivergent) { + bool insertedDivergent = false; + const auto &MRI = F.getRegInfo(); + const auto &TRI = *MRI.getTargetRegisterInfo(); + for (auto &op : Instr.operands()) { + if (!op.isReg() || !op.isDef()) + continue; + if (!op.getReg().isVirtual()) + continue; + assert(!op.getSubReg()); + if (!AllDefsDivergent) { + auto *RC = MRI.getRegClassOrNull(op.getReg()); + if (RC && !TRI.isDivergentRegClass(RC)) + continue; + } + insertedDivergent |= markDivergent(op.getReg()); + } + return insertedDivergent; +} + +template <> +void llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::initialize() { + const auto &InstrInfo = *F.getSubtarget().getInstrInfo(); + + for (const MachineBasicBlock &block : F) { + for (const MachineInstr &instr : block) { + auto uniformity = InstrInfo.getInstructionUniformity(instr); + if (uniformity == InstructionUniformity::AlwaysUniform) { + addUniformOverride(instr); + continue; + } + + if (uniformity == InstructionUniformity::NeverUniform) { + markDefsDivergent(instr, /* AllDefsDivergent = */ false); + } + } + } +} + +template <> +void llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::pushUsers( + Register Reg) { + const auto &RegInfo = F.getRegInfo(); + for (MachineInstr &UserInstr : RegInfo.use_instructions(Reg)) { + if (isAlwaysUniform(UserInstr)) + continue; + if (markDivergent(UserInstr)) + Worklist.push_back(&UserInstr); + } +} + +template <> +void llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::pushUsers( + const MachineInstr &Instr) { + assert(!isAlwaysUniform(Instr)); + if (Instr.isTerminator()) + return; + for (const MachineOperand &op : Instr.operands()) { + if (op.isReg() && op.isDef() && op.getReg().isVirtual()) + pushUsers(op.getReg()); + } +} + +template <> +bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::usesValueFromCycle( + const MachineInstr &I, const MachineCycle &DefCycle) const { + assert(!isAlwaysUniform(I)); + for (auto &Op : I.operands()) { + if (!Op.isReg() || !Op.readsReg()) + continue; + auto Reg = Op.getReg(); + assert(Reg.isVirtual()); + auto *Def = F.getRegInfo().getVRegDef(Reg); + if (DefCycle.contains(Def->getParent())) + return true; + } + return false; +} + +// This ensures explicit instantiation of +// GenericUniformityAnalysisImpl::ImplDeleter::operator() +template class llvm::GenericUniformityInfo<MachineSSAContext>; +template struct llvm::GenericUniformityAnalysisImplDeleter< + llvm::GenericUniformityAnalysisImpl<MachineSSAContext>>; + +MachineUniformityInfo +llvm::computeMachineUniformityInfo(MachineFunction &F, + const MachineCycleInfo &cycleInfo, + const MachineDomTree &domTree) { + assert(F.getRegInfo().isSSA() && "Expected to be run on SSA form!"); + return MachineUniformityInfo(F, domTree, cycleInfo); +} + +namespace { + +/// Legacy analysis pass which computes a \ref MachineUniformityInfo. +class MachineUniformityAnalysisPass : public MachineFunctionPass { + MachineUniformityInfo UI; + +public: + static char ID; + + MachineUniformityAnalysisPass(); + + MachineUniformityInfo &getUniformityInfo() { return UI; } + const MachineUniformityInfo &getUniformityInfo() const { return UI; } + + bool runOnMachineFunction(MachineFunction &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void print(raw_ostream &OS, const Module *M = nullptr) const override; + + // TODO: verify analysis +}; + +class MachineUniformityInfoPrinterPass : public MachineFunctionPass { +public: + static char ID; + + MachineUniformityInfoPrinterPass(); + + bool runOnMachineFunction(MachineFunction &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +} // namespace + +char MachineUniformityAnalysisPass::ID = 0; + +MachineUniformityAnalysisPass::MachineUniformityAnalysisPass() + : MachineFunctionPass(ID) { + initializeMachineUniformityAnalysisPassPass(*PassRegistry::getPassRegistry()); +} + +INITIALIZE_PASS_BEGIN(MachineUniformityAnalysisPass, "machine-uniformity", + "Machine Uniformity Info Analysis", true, true) +INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(MachineUniformityAnalysisPass, "machine-uniformity", + "Machine Uniformity Info Analysis", true, true) + +void MachineUniformityAnalysisPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineCycleInfoWrapperPass>(); + AU.addRequired<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool MachineUniformityAnalysisPass::runOnMachineFunction(MachineFunction &MF) { + auto &DomTree = getAnalysis<MachineDominatorTree>().getBase(); + auto &CI = getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo(); + UI = computeMachineUniformityInfo(MF, CI, DomTree); + return false; +} + +void MachineUniformityAnalysisPass::print(raw_ostream &OS, + const Module *) const { + OS << "MachineUniformityInfo for function: " << UI.getFunction().getName() + << "\n"; + UI.print(OS); +} + +char MachineUniformityInfoPrinterPass::ID = 0; + +MachineUniformityInfoPrinterPass::MachineUniformityInfoPrinterPass() + : MachineFunctionPass(ID) { + initializeMachineUniformityInfoPrinterPassPass( + *PassRegistry::getPassRegistry()); +} + +INITIALIZE_PASS_BEGIN(MachineUniformityInfoPrinterPass, + "print-machine-uniformity", + "Print Machine Uniformity Info Analysis", true, true) +INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass) +INITIALIZE_PASS_END(MachineUniformityInfoPrinterPass, + "print-machine-uniformity", + "Print Machine Uniformity Info Analysis", true, true) + +void MachineUniformityInfoPrinterPass::getAnalysisUsage( + AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineUniformityAnalysisPass>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool MachineUniformityInfoPrinterPass::runOnMachineFunction( + MachineFunction &F) { + auto &UI = getAnalysis<MachineUniformityAnalysisPass>(); + UI.print(errs()); + return false; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp index 93e68918b632..ddd5a027c2cd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp @@ -73,6 +73,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/ModRef.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include <algorithm> @@ -294,6 +295,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addUsedIfAvailable<LiveStacks>(); + AU.addUsedIfAvailable<LiveVariables>(); AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -564,7 +566,7 @@ void MachineVerifier::report_context_vreg(Register VReg) const { } void MachineVerifier::report_context_vreg_regunit(Register VRegOrUnit) const { - if (Register::isVirtualRegister(VRegOrUnit)) { + if (VRegOrUnit.isVirtual()) { report_context_vreg(VRegOrUnit); } else { errs() << "- regunit: " << printRegUnit(VRegOrUnit, TRI) << '\n'; @@ -632,6 +634,13 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } } + if (MBB->isIRBlockAddressTaken()) { + if (!MBB->getAddressTakenIRBlock()->hasAddressTaken()) + report("ir-block-address-taken is associated with basic block not used by " + "a blockaddress.", + MBB); + } + // Count the number of landing pad successors. SmallPtrSet<const MachineBasicBlock*, 4> LandingPadSuccs; for (const auto *succ : MBB->successors()) { @@ -821,8 +830,12 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) { if (!FirstTerminator) FirstTerminator = MI; } else if (FirstTerminator) { - report("Non-terminator instruction after the first terminator", MI); - errs() << "First terminator was:\t" << *FirstTerminator; + // For GlobalISel, G_INVOKE_REGION_START is a terminator that we allow to + // precede non-terminators. + if (FirstTerminator->getOpcode() != TargetOpcode::G_INVOKE_REGION_START) { + report("Non-terminator instruction after the first terminator", MI); + errs() << "First terminator was:\t" << *FirstTerminator; + } } } @@ -869,6 +882,34 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) { if (!MO.isReg() || !MO.isImplicit()) report("Expected implicit register after groups", &MO, OpNo); } + + if (MI->getOpcode() == TargetOpcode::INLINEASM_BR) { + const MachineBasicBlock *MBB = MI->getParent(); + + for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->getNumOperands(); + i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + if (!MO.isMBB()) + continue; + + // Check the successor & predecessor lists look ok, assume they are + // not. Find the indirect target without going through the successors. + const MachineBasicBlock *IndirectTargetMBB = MO.getMBB(); + if (!IndirectTargetMBB) { + report("INLINEASM_BR indirect target does not exist", &MO, i); + break; + } + + if (!MBB->isSuccessor(IndirectTargetMBB)) + report("INLINEASM_BR indirect target missing from successor list", &MO, + i); + + if (!IndirectTargetMBB->isPredecessor(MBB)) + report("INLINEASM_BR indirect target predecessor list missing parent", + &MO, i); + } + } } bool MachineVerifier::verifyAllRegOpsScalar(const MachineInstr &MI, @@ -937,11 +978,11 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { SmallVector<LLT, 4> Types; for (unsigned I = 0, E = std::min(MCID.getNumOperands(), NumOps); I != E; ++I) { - if (!MCID.OpInfo[I].isGenericType()) + if (!MCID.operands()[I].isGenericType()) continue; // Generic instructions specify type equality constraints between some of // their operands. Make sure these are consistent. - size_t TypeIdx = MCID.OpInfo[I].getGenericTypeIndex(); + size_t TypeIdx = MCID.operands()[I].getGenericTypeIndex(); Types.resize(std::max(TypeIdx + 1, Types.size())); const MachineOperand *MO = &MI->getOperand(I); @@ -969,7 +1010,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { // Generic opcodes must not have physical register operands. for (unsigned I = 0; I < MI->getNumOperands(); ++I) { const MachineOperand *MO = &MI->getOperand(I); - if (MO->isReg() && Register::isPhysicalRegister(MO->getReg())) + if (MO->isReg() && MO->getReg().isPhysical()) report("Generic instruction cannot have physical register", MO, I); } @@ -1274,17 +1315,38 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } case TargetOpcode::G_UNMERGE_VALUES: { + unsigned NumDsts = MI->getNumOperands() - 1; LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); - LLT SrcTy = MRI->getType(MI->getOperand(MI->getNumOperands()-1).getReg()); - // For now G_UNMERGE can split vectors. - for (unsigned i = 0; i < MI->getNumOperands()-1; ++i) { - if (MRI->getType(MI->getOperand(i).getReg()) != DstTy) + for (unsigned i = 1; i < NumDsts; ++i) { + if (MRI->getType(MI->getOperand(i).getReg()) != DstTy) { report("G_UNMERGE_VALUES destination types do not match", MI); + break; + } } - if (SrcTy.getSizeInBits() != - (DstTy.getSizeInBits() * (MI->getNumOperands() - 1))) { - report("G_UNMERGE_VALUES source operand does not cover dest operands", - MI); + + LLT SrcTy = MRI->getType(MI->getOperand(NumDsts).getReg()); + if (DstTy.isVector()) { + // This case is the converse of G_CONCAT_VECTORS. + if (!SrcTy.isVector() || SrcTy.getScalarType() != DstTy.getScalarType() || + SrcTy.getNumElements() != NumDsts * DstTy.getNumElements()) + report("G_UNMERGE_VALUES source operand does not match vector " + "destination operands", + MI); + } else if (SrcTy.isVector()) { + // This case is the converse of G_BUILD_VECTOR, but relaxed to allow + // mismatched types as long as the total size matches: + // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<4 x s32>) + if (SrcTy.getSizeInBits() != NumDsts * DstTy.getSizeInBits()) + report("G_UNMERGE_VALUES vector source operand does not match scalar " + "destination operands", + MI); + } else { + // This case is the converse of G_MERGE_VALUES. + if (SrcTy.getSizeInBits() != NumDsts * DstTy.getSizeInBits()) { + report("G_UNMERGE_VALUES scalar source operand does not match scalar " + "destination operands", + MI); + } } break; } @@ -1438,10 +1500,9 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { bool NoSideEffects = MI->getOpcode() == TargetOpcode::G_INTRINSIC; unsigned IntrID = IntrIDOp.getIntrinsicID(); if (IntrID != 0 && IntrID < Intrinsic::num_intrinsics) { - AttributeList Attrs - = Intrinsic::getAttributes(MF->getFunction().getContext(), - static_cast<Intrinsic::ID>(IntrID)); - bool DeclHasSideEffects = !Attrs.hasFnAttr(Attribute::ReadNone); + AttributeList Attrs = Intrinsic::getAttributes( + MF->getFunction().getContext(), static_cast<Intrinsic::ID>(IntrID)); + bool DeclHasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory(); if (NoSideEffects && DeclHasSideEffects) { report("G_INTRINSIC used with intrinsic that accesses memory", MI); break; @@ -1678,16 +1739,11 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { report("Incorrect floating-point class set (operand 2)", MI); break; } - const MachineOperand &SemanticsMO = MI->getOperand(3); - if (!SemanticsMO.isImm()) { - report("floating-point semantics (operand 3) must be an immediate", MI); - break; - } - int64_t Semantics = SemanticsMO.getImm(); - if (Semantics < 0 || Semantics > APFloat::S_MaxSemantics) { - report("Incorrect floating-point semantics (operand 3)", MI); - break; - } + break; + } + case TargetOpcode::G_ASSERT_ALIGN: { + if (MI->getOperand(2).getImm() < 1) + report("alignment immediate must be >= 1", MI); break; } default: @@ -1888,6 +1944,36 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { break; } } break; + case TargetOpcode::REG_SEQUENCE: { + unsigned NumOps = MI->getNumOperands(); + if (!(NumOps & 1)) { + report("Invalid number of operands for REG_SEQUENCE", MI); + break; + } + + for (unsigned I = 1; I != NumOps; I += 2) { + const MachineOperand &RegOp = MI->getOperand(I); + const MachineOperand &SubRegOp = MI->getOperand(I + 1); + + if (!RegOp.isReg()) + report("Invalid register operand for REG_SEQUENCE", &RegOp, I); + + if (!SubRegOp.isImm() || SubRegOp.getImm() == 0 || + SubRegOp.getImm() >= TRI->getNumSubRegIndices()) { + report("Invalid subregister index operand for REG_SEQUENCE", + &SubRegOp, I + 1); + } + } + + Register DstReg = MI->getOperand(0).getReg(); + if (DstReg.isPhysical()) + report("REG_SEQUENCE does not support physical register results", MI); + + if (MI->getOperand(0).getSubReg()) + report("Invalid subreg result for REG_SEQUENCE", MI); + + break; + } } } @@ -1901,7 +1987,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { // The first MCID.NumDefs operands must be explicit register defines if (MONum < NumDefs) { - const MCOperandInfo &MCOI = MCID.OpInfo[MONum]; + const MCOperandInfo &MCOI = MCID.operands()[MONum]; if (!MO->isReg()) report("Explicit definition must be a register", MO, MONum); else if (!MO->isDef() && !MCOI.isOptionalDef()) @@ -1909,7 +1995,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { else if (MO->isImplicit()) report("Explicit definition marked as implicit", MO, MONum); } else if (MONum < MCID.getNumOperands()) { - const MCOperandInfo &MCOI = MCID.OpInfo[MONum]; + const MCOperandInfo &MCOI = MCID.operands()[MONum]; // Don't check if it's the last operand in a variadic instruction. See, // e.g., LDM_RET in the arm back end. Check non-variadic operands only. bool IsOptional = MI->isVariadic() && MONum == MCID.getNumOperands() - 1; @@ -1941,11 +2027,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { report("Operand should be tied", MO, MONum); else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum)) report("Tied def doesn't match MCInstrDesc", MO, MONum); - else if (Register::isPhysicalRegister(MO->getReg())) { + else if (MO->getReg().isPhysical()) { const MachineOperand &MOTied = MI->getOperand(TiedTo); if (!MOTied.isReg()) report("Tied counterpart must be a register", &MOTied, TiedTo); - else if (Register::isPhysicalRegister(MOTied.getReg()) && + else if (MOTied.getReg().isPhysical() && MO->getReg() != MOTied.getReg()) report("Tied physical registers must match.", &MOTied, TiedTo); } @@ -2017,7 +2103,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { // Check register classes. unsigned SubIdx = MO->getSubReg(); - if (Register::isPhysicalRegister(Reg)) { + if (Reg.isPhysical()) { if (SubIdx) { report("Illegal subregister index for physical register", MO, MONum); return; @@ -2255,8 +2341,18 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO, bool SubRangeCheck, LaneBitmask LaneMask) { if (const VNInfo *VNI = LR.getVNInfoAt(DefIdx)) { - assert(VNI && "NULL valno is not allowed"); - if (VNI->def != DefIdx) { + // The LR can correspond to the whole reg and its def slot is not obliged + // to be the same as the MO' def slot. E.g. when we check here "normal" + // subreg MO but there is other EC subreg MO in the same instruction so the + // whole reg has EC def slot and differs from the currently checked MO' def + // slot. For example: + // %0 [16e,32r:0) 0@16e L..3 [16e,32r:0) 0@16e L..C [16r,32r:0) 0@16r + // Check that there is an early-clobber def of the same superregister + // somewhere is performed in visitMachineFunctionAfter() + if (((SubRangeCheck || MO->getSubReg() == 0) && VNI->def != DefIdx) || + !SlotIndex::isSameInstr(VNI->def, DefIdx) || + (VNI->def != DefIdx && + (!VNI->def.isEarlyClobber() || !DefIdx.isRegister()))) { report("Inconsistent valno->def", MO, MONum); report_context_liverange(LR); report_context_vreg_regunit(VRegOrUnit); @@ -2277,8 +2373,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO, if (MO->isDead()) { LiveQueryResult LRQ = LR.Query(DefIdx); if (!LRQ.isDeadDef()) { - assert(Register::isVirtualRegister(VRegOrUnit) && - "Expecting a virtual register."); + assert(VRegOrUnit.isVirtual() && "Expecting a virtual register."); // A dead subreg def only tells us that the specific subreg is dead. There // could be other non-dead defs of other subregs, or we could have other // parts of the register being live through the instruction. So unless we @@ -2688,7 +2783,7 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock &MBB) { MODef.isEarlyClobber() || MODef.isDebug()) report("Unexpected flag on PHI operand", &MODef, 0); Register DefReg = MODef.getReg(); - if (!Register::isVirtualRegister(DefReg)) + if (!DefReg.isVirtual()) report("Expected first PHI operand to be a virtual register", &MODef, 0); for (unsigned I = 1, E = Phi.getNumOperands(); I != E; I += 2) { @@ -2920,12 +3015,11 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) { if (!MOI->isReg() || !MOI->isDef()) continue; - if (Register::isVirtualRegister(Reg)) { + if (Reg.isVirtual()) { if (MOI->getReg() != Reg) continue; } else { - if (!Register::isPhysicalRegister(MOI->getReg()) || - !TRI->hasRegUnit(MOI->getReg(), Reg)) + if (!MOI->getReg().isPhysical() || !TRI->hasRegUnit(MOI->getReg(), Reg)) continue; } if (LaneMask.any() && @@ -3007,8 +3101,8 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, return; // RegUnit intervals are allowed dead phis. - if (!Register::isVirtualRegister(Reg) && VNI->isPHIDef() && - S.start == VNI->def && S.end == VNI->def.getDeadSlot()) + if (!Reg.isVirtual() && VNI->isPHIDef() && S.start == VNI->def && + S.end == VNI->def.getDeadSlot()) return; // The live segment is ending inside EndMBB @@ -3055,7 +3149,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // The following checks only apply to virtual registers. Physreg liveness // is too weird to check. - if (Register::isVirtualRegister(Reg)) { + if (Reg.isVirtual()) { // A live segment can end with either a redefinition, a kill flag on a // use, or a dead flag on a def. bool hasRead = false; @@ -3128,7 +3222,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, while (true) { assert(LiveInts->isLiveInToMBB(LR, &*MFI)); // We don't know how to track physregs into a landing pad. - if (!Register::isVirtualRegister(Reg) && MFI->isEHPad()) { + if (!Reg.isVirtual() && MFI->isEHPad()) { if (&*MFI == EndMBB) break; ++MFI; @@ -3196,7 +3290,7 @@ void MachineVerifier::verifyLiveRange(const LiveRange &LR, Register Reg, void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { Register Reg = LI.reg(); - assert(Register::isVirtualRegister(Reg)); + assert(Reg.isVirtual()); verifyLiveRange(LI, Reg); LaneBitmask Mask; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp index 581168b31384..af9fef0720f9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -116,6 +116,12 @@ void ModuloScheduleExpander::generatePipelinedLoop() { // a map between register names in the original block and the names created // in each stage of the pipelined loop. ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2]; + + // The renaming destination by Phis for the registers across stages. + // This map is updated during Phis generation to point to the most recent + // renaming destination. + ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2]; + InstrMapTy InstrMap; SmallVector<MachineBasicBlock *, 4> PrologBBs; @@ -151,14 +157,15 @@ void ModuloScheduleExpander::generatePipelinedLoop() { generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap, MaxStageCount, MaxStageCount, false); - generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap, - MaxStageCount, MaxStageCount, false); + generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi, + InstrMap, MaxStageCount, MaxStageCount, false); LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump();); SmallVector<MachineBasicBlock *, 4> EpilogBBs; // Generate the epilog instructions to complete the pipeline. - generateEpilog(MaxStageCount, KernelBB, BB, VRMap, EpilogBBs, PrologBBs); + generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs, + PrologBBs); // We need this step because the register allocation doesn't handle some // situations well, so we insert copies to help out. @@ -171,6 +178,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() { addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap); delete[] VRMap; + delete[] VRMapPhi; } void ModuloScheduleExpander::cleanup() { @@ -242,7 +250,8 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage, /// block for each stage that needs to complete. void ModuloScheduleExpander::generateEpilog( unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB, - ValueMapTy *VRMap, MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs) { + ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs, + MBBVectorTy &PrologBBs) { // We need to change the branch from the kernel to the first epilog block, so // this call to analyze branch uses the kernel rather than the original BB. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; @@ -296,8 +305,8 @@ void ModuloScheduleExpander::generateEpilog( } generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap, LastStage, EpilogStage, i == 1); - generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap, - LastStage, EpilogStage, i == 1); + generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi, + InstrMap, LastStage, EpilogStage, i == 1); PredBB = NewBB; LLVM_DEBUG({ @@ -593,8 +602,9 @@ void ModuloScheduleExpander::generateExistingPhis( /// use in the pipelined sequence. void ModuloScheduleExpander::generatePhis( MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, - MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap, - unsigned LastStageNum, unsigned CurStageNum, bool IsLast) { + MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi, + InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum, + bool IsLast) { // Compute the stage number that contains the initial Phi value, and // the Phi from the previous stage. unsigned PrologStage = 0; @@ -614,8 +624,7 @@ void ModuloScheduleExpander::generatePhis( BBI != BBE; ++BBI) { for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) { MachineOperand &MO = BBI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || - !Register::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual()) continue; int StageScheduled = Schedule.getStage(&*BBI); @@ -631,26 +640,49 @@ void ModuloScheduleExpander::generatePhis( if (!InKernel && (unsigned)StageScheduled > PrologStage) continue; - unsigned PhiOp2 = VRMap[PrevStage][Def]; - if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2)) - if (InstOp2->isPHI() && InstOp2->getParent() == NewBB) - PhiOp2 = getLoopPhiReg(*InstOp2, BB2); + unsigned PhiOp2; + if (InKernel) { + PhiOp2 = VRMap[PrevStage][Def]; + if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2)) + if (InstOp2->isPHI() && InstOp2->getParent() == NewBB) + PhiOp2 = getLoopPhiReg(*InstOp2, BB2); + } // The number of Phis can't exceed the number of prolog stages. The // prolog stage number is zero based. if (NumPhis > PrologStage + 1 - StageScheduled) NumPhis = PrologStage + 1 - StageScheduled; for (unsigned np = 0; np < NumPhis; ++np) { + // Example for + // Org: + // %Org = ... (Scheduled at Stage#0, NumPhi = 2) + // + // Prolog0 (Stage0): + // %Clone0 = ... + // Prolog1 (Stage1): + // %Clone1 = ... + // Kernel (Stage2): + // %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel + // %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel + // %Clone2 = ... + // Epilog0 (Stage3): + // %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel + // %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel + // Epilog1 (Stage4): + // %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0 + // + // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2} + // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0} + // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2} + unsigned PhiOp1 = VRMap[PrologStage][Def]; if (np <= PrologStage) PhiOp1 = VRMap[PrologStage - np][Def]; - if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) { - if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB) - PhiOp1 = getInitPhiReg(*InstOp1, KernelBB); - if (InstOp1->isPHI() && InstOp1->getParent() == NewBB) - PhiOp1 = getInitPhiReg(*InstOp1, NewBB); + if (!InKernel) { + if (PrevStage == LastStageNum && np == 0) + PhiOp2 = VRMap[LastStageNum][Def]; + else + PhiOp2 = VRMapPhi[PrevStage - np][Def]; } - if (!InKernel) - PhiOp2 = VRMap[PrevStage - np][Def]; const TargetRegisterClass *RC = MRI.getRegClass(Def); Register NewReg = MRI.createVirtualRegister(RC); @@ -672,9 +704,9 @@ void ModuloScheduleExpander::generatePhis( NewReg); PhiOp2 = NewReg; - VRMap[PrevStage - np - 1][Def] = NewReg; + VRMapPhi[PrevStage - np - 1][Def] = NewReg; } else { - VRMap[CurStageNum - np][Def] = NewReg; + VRMapPhi[CurStageNum - np][Def] = NewReg; if (np == NumPhis - 1) rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def, NewReg); @@ -716,7 +748,7 @@ void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB, continue; Register reg = MO.getReg(); // Assume physical registers are used, unless they are marked dead. - if (Register::isPhysicalRegister(reg)) { + if (reg.isPhysical()) { used = !MO.isDead(); if (used) break; @@ -847,7 +879,7 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB, MachineBasicBlock *Epilog = EpilogBBs[i]; SmallVector<MachineOperand, 4> Cond; - Optional<bool> StaticallyGreater = + std::optional<bool> StaticallyGreater = LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond); unsigned numAdded = 0; if (!StaticallyGreater) { @@ -965,17 +997,6 @@ MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) { MachineInstr *NewMI = MF.CloneMachineInstr(OldMI); - // Check for tied operands in inline asm instructions. This should be handled - // elsewhere, but I'm not sure of the best solution. - if (OldMI->isInlineAsm()) - for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) { - const auto &MO = OldMI->getOperand(i); - if (MO.isReg() && MO.isUse()) - break; - unsigned UseIdx; - if (OldMI->isRegTiedToUseOperand(i, &UseIdx)) - NewMI->tieOperands(i, UseIdx); - } updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum); return NewMI; } @@ -1010,7 +1031,7 @@ void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI, unsigned InstrStageNum, ValueMapTy *VRMap) { for (MachineOperand &MO : NewMI->operands()) { - if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !MO.getReg().isVirtual()) continue; Register reg = MO.getReg(); if (MO.isDef()) { @@ -1259,7 +1280,7 @@ class KernelRewriter { // Insert a phi that carries LoopReg from the loop body and InitReg otherwise. // If InitReg is not given it is chosen arbitrarily. It will either be undef // or will be chosen so as to share another phi. - Register phi(Register LoopReg, Optional<Register> InitReg = {}, + Register phi(Register LoopReg, std::optional<Register> InitReg = {}, const TargetRegisterClass *RC = nullptr); // Create an undef register of the given register class. Register undef(const TargetRegisterClass *RC); @@ -1367,7 +1388,7 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) { // First, dive through the phi chain to find the defaults for the generated // phis. - SmallVector<Optional<Register>, 4> Defaults; + SmallVector<std::optional<Register>, 4> Defaults; Register LoopReg = Reg; auto LoopProducer = Producer; while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) { @@ -1378,7 +1399,7 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) { } int LoopProducerStage = S.getStage(LoopProducer); - Optional<Register> IllegalPhiDefault; + std::optional<Register> IllegalPhiDefault; if (LoopProducerStage == -1) { // Do nothing. @@ -1410,9 +1431,9 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) { // If we need more phis than we have defaults for, pad out with undefs for // the earliest phis, which are at the end of the defaults chain (the // chain is in reverse order). - Defaults.resize(Defaults.size() + StageDiff, Defaults.empty() - ? Optional<Register>() - : Defaults.back()); + Defaults.resize(Defaults.size() + StageDiff, + Defaults.empty() ? std::optional<Register>() + : Defaults.back()); } } @@ -1444,11 +1465,11 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) { return LoopReg; } -Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg, +Register KernelRewriter::phi(Register LoopReg, std::optional<Register> InitReg, const TargetRegisterClass *RC) { // If the init register is not undef, try and find an existing phi. if (InitReg) { - auto I = Phis.find({LoopReg, InitReg.value()}); + auto I = Phis.find({LoopReg, *InitReg}); if (I != Phis.end()) return I->second; } else { @@ -1469,10 +1490,10 @@ Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg, return R; // Found a phi taking undef as input, so rewrite it to take InitReg. MachineInstr *MI = MRI.getVRegDef(R); - MI->getOperand(1).setReg(InitReg.value()); - Phis.insert({{LoopReg, InitReg.value()}, R}); + MI->getOperand(1).setReg(*InitReg); + Phis.insert({{LoopReg, *InitReg}, R}); const TargetRegisterClass *ConstrainRegClass = - MRI.constrainRegClass(R, MRI.getRegClass(InitReg.value())); + MRI.constrainRegClass(R, MRI.getRegClass(*InitReg)); assert(ConstrainRegClass && "Expected a valid constrained register class!"); (void)ConstrainRegClass; UndefPhis.erase(I); @@ -1941,7 +1962,7 @@ void PeelingModuloScheduleExpander::fixupBranches() { MachineBasicBlock *Epilog = *EI; SmallVector<MachineOperand, 4> Cond; TII->removeBranch(*Prolog); - Optional<bool> StaticallyGreater = + std::optional<bool> StaticallyGreater = LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond); if (!StaticallyGreater) { LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp index d5d262e4047a..e68a6398cf51 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp @@ -117,7 +117,7 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, // Skip over register-to-register moves. if (SrcMI && SrcMI->isCopy() && !SrcMI->getOperand(0).getSubReg() && !SrcMI->getOperand(1).getSubReg() && - Register::isVirtualRegister(SrcMI->getOperand(1).getReg())) { + SrcMI->getOperand(1).getReg().isVirtual()) { SrcReg = SrcMI->getOperand(1).getReg(); SrcMI = MRI->getVRegDef(SrcReg); } @@ -142,8 +142,7 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) { assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction"); Register DstReg = MI->getOperand(0).getReg(); - assert(Register::isVirtualRegister(DstReg) && - "PHI destination is not a virtual register"); + assert(DstReg.isVirtual() && "PHI destination is not a virtual register"); // See if we already saw this register. if (!PHIsInCycle.insert(MI).second) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp index 7709095cd683..51035d2e442f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp @@ -161,7 +161,7 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { for (unsigned Index = 0, e = MRI->getNumVirtRegs(); Index != e; ++Index) { // Set the bit for this register for each MBB where it is // live-through or live-in (killed). - unsigned VirtReg = Register::index2VirtReg(Index); + Register VirtReg = Register::index2VirtReg(Index); MachineInstr *DefMI = MRI->getVRegDef(VirtReg); if (!DefMI) continue; @@ -441,7 +441,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg(); bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() || isImplicitlyDefined(SrcReg, *MRI); - assert(Register::isVirtualRegister(SrcReg) && + assert(SrcReg.isVirtual() && "Machine PHI Operands must all be virtual registers!"); // Get the MachineBasicBlock equivalent of the BasicBlock that is the source diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp index 0f9da0637ced..9449f143366f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp @@ -37,23 +37,6 @@ struct PatchableFunction : public MachineFunctionPass { }; } -/// Returns true if instruction \p MI will not result in actual machine code -/// instructions. -static bool doesNotGeneratecode(const MachineInstr &MI) { - // TODO: Introduce an MCInstrDesc flag for this - switch (MI.getOpcode()) { - default: return false; - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - case TargetOpcode::CFI_INSTRUCTION: - case TargetOpcode::EH_LABEL: - case TargetOpcode::GC_LABEL: - case TargetOpcode::DBG_VALUE: - case TargetOpcode::DBG_LABEL: - return true; - } -} - bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) { if (MF.getFunction().hasFnAttribute("patchable-function-entry")) { MachineBasicBlock &FirstMBB = *MF.begin(); @@ -74,11 +57,28 @@ bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) { #endif auto &FirstMBB = *MF.begin(); - MachineBasicBlock::iterator FirstActualI = FirstMBB.begin(); - for (; doesNotGeneratecode(*FirstActualI); ++FirstActualI) - assert(FirstActualI != FirstMBB.end()); - auto *TII = MF.getSubtarget().getInstrInfo(); + + MachineBasicBlock::iterator FirstActualI = llvm::find_if( + FirstMBB, [](const MachineInstr &MI) { return !MI.isMetaInstruction(); }); + + if (FirstActualI == FirstMBB.end()) { + // As of Microsoft documentation on /hotpatch feature, we must ensure that + // "the first instruction of each function is at least two bytes, and no + // jump within the function goes to the first instruction" + + // When the first MBB is empty, insert a patchable no-op. This ensures the + // first instruction is patchable in two special cases: + // - the function is empty (e.g. unreachable) + // - the function jumps back to the first instruction, which is in a + // successor MBB. + BuildMI(&FirstMBB, DebugLoc(), TII->get(TargetOpcode::PATCHABLE_OP)) + .addImm(2) + .addImm(TargetOpcode::PATCHABLE_OP); + MF.ensureAlignment(Align(16)); + return true; + } + auto MIB = BuildMI(FirstMBB, FirstActualI, FirstActualI->getDebugLoc(), TII->get(TargetOpcode::PATCHABLE_OP)) .addImm(2) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 31e37c4cd7e3..c3458be0f883 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -66,7 +66,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -273,11 +272,11 @@ namespace { : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {} MachineInstr *getMI() const { return MI; } - Optional<IndexPair> getCommutePair() const { return CommutePair; } + std::optional<IndexPair> getCommutePair() const { return CommutePair; } private: MachineInstr *MI; - Optional<IndexPair> CommutePair; + std::optional<IndexPair> CommutePair; }; /// Helper class to hold a reply for ValueTracker queries. @@ -696,7 +695,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg, do { CurSrcPair = SrcToLook.pop_back_val(); // As explained above, do not handle physical registers - if (Register::isPhysicalRegister(CurSrcPair.Reg)) + if (CurSrcPair.Reg.isPhysical()) return false; ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII); @@ -744,7 +743,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg, // constraints to the register allocator. Moreover, if we want to extend // the live-range of a physical register, unlike SSA virtual register, // we will have to check that they aren't redefine before the related use. - if (Register::isPhysicalRegister(CurSrcPair.Reg)) + if (CurSrcPair.Reg.isPhysical()) return false; // Keep following the chain if the value isn't any better yet. @@ -1191,7 +1190,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) { "Coalescer can understand multiple defs?!"); const MachineOperand &MODef = MI.getOperand(0); // Do not rewrite physical definitions. - if (Register::isPhysicalRegister(MODef.getReg())) + if (MODef.getReg().isPhysical()) return false; bool Changed = false; @@ -1242,8 +1241,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) { MachineInstr & PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike, RegSubRegPair Def, RewriteMapTy &RewriteMap) { - assert(!Register::isPhysicalRegister(Def.Reg) && - "We do not rewrite physical registers"); + assert(!Def.Reg.isPhysical() && "We do not rewrite physical registers"); // Find the new source to use in the COPY rewrite. RegSubRegPair NewSrc = getNewSource(MRI, TII, Def, RewriteMap); @@ -1301,7 +1299,7 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy( while (CpyRewriter.getNextRewritableSource(Src, Def)) { // If a physical register is here, this is probably for a good reason. // Do not rewrite that. - if (Register::isPhysicalRegister(Def.Reg)) + if (Def.Reg.isPhysical()) return false; // If we do not know how to rewrite this definition, there is no point @@ -1460,7 +1458,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); - if (isNAPhysCopy(SrcReg) && Register::isVirtualRegister(DstReg)) { + if (isNAPhysCopy(SrcReg) && DstReg.isVirtual()) { // %vreg = COPY $physreg // Avoid using a datastructure which can track multiple live non-allocatable // phys->virt copies since LLVM doesn't seem to do this. @@ -2110,7 +2108,7 @@ ValueTrackerResult ValueTracker::getNextSource() { // If we can still move up in the use-def chain, move to the next // definition. - if (!Register::isPhysicalRegister(Reg) && OneRegSrc) { + if (!Reg.isPhysical() && OneRegSrc) { MachineRegisterInfo::def_iterator DI = MRI.def_begin(Reg); if (DI != MRI.def_end()) { Def = DI->getParent(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp index 54bb4a31ef49..7e46dd35ce47 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -82,7 +82,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { LLVM_DEBUG(dbgs() << "Processing " << *MI); Register Reg = MI->getOperand(0).getReg(); - if (Register::isVirtualRegister(Reg)) { + if (Reg.isVirtual()) { // For virtual registers, mark all uses as <undef>, and convert users to // implicit-def when possible. for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { @@ -108,8 +108,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { if (!MO.isReg()) continue; Register UserReg = MO.getReg(); - if (!Register::isPhysicalRegister(UserReg) || - !TRI->regsOverlap(Reg, UserReg)) + if (!UserReg.isPhysical() || !TRI->regsOverlap(Reg, UserReg)) continue; // UserMI uses or redefines Reg. Set <undef> flags on all uses. Found = true; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp index a8d40edd88d3..cc70ec477650 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -57,6 +57,7 @@ #include "llvm/Support/CodeGen.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -127,6 +128,17 @@ private: void replaceFrameIndices(MachineFunction &MF); void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, int &SPAdj); + // Frame indices in debug values are encoded in a target independent + // way with simply the frame index and offset rather than any + // target-specific addressing mode. + bool replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI, + unsigned OpIdx, int SPAdj = 0); + // Does same as replaceFrameIndices but using the backward MIR walk and + // backward register scavenger walk. Does not yet support call sequence + // processing. + void replaceFrameIndicesBackward(MachineBasicBlock *BB, MachineFunction &MF, + int &SPAdj); + void insertPrologEpilogCode(MachineFunction &MF); void insertZeroCallUsedRegs(MachineFunction &MF); }; @@ -283,13 +295,35 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) { assert(!Failed && "Invalid warn-stack-size fn attr value"); (void)Failed; } - if (MF.getFunction().hasFnAttribute(Attribute::SafeStack)) { - StackSize += MFI.getUnsafeStackSize(); - } + uint64_t UnsafeStackSize = MFI.getUnsafeStackSize(); + if (MF.getFunction().hasFnAttribute(Attribute::SafeStack)) + StackSize += UnsafeStackSize; + if (StackSize > Threshold) { DiagnosticInfoStackSize DiagStackSize(F, StackSize, Threshold, DS_Warning); F.getContext().diagnose(DiagStackSize); + int64_t SpillSize = 0; + for (int Idx = MFI.getObjectIndexBegin(), End = MFI.getObjectIndexEnd(); + Idx != End; ++Idx) { + if (MFI.isSpillSlotObjectIndex(Idx)) + SpillSize += MFI.getObjectSize(Idx); + } + + float SpillPct = + static_cast<float>(SpillSize) / static_cast<float>(StackSize); + float VarPct = 1.0f - SpillPct; + int64_t VariableSize = StackSize - SpillSize; + dbgs() << formatv("{0}/{1} ({3:P}) spills, {2}/{1} ({4:P}) variables", + SpillSize, StackSize, VariableSize, SpillPct, VarPct); + if (UnsafeStackSize != 0) { + float UnsafePct = + static_cast<float>(UnsafeStackSize) / static_cast<float>(StackSize); + dbgs() << formatv(", {0}/{2} ({1:P}) unsafe stack", UnsafeStackSize, + UnsafePct, StackSize); + } + dbgs() << "\n"; } + ORE->emit([&]() { return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "StackSize", MF.getFunction().getSubprogram(), @@ -575,7 +609,7 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock, } else { const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC, - TRI); + TRI, Register()); } } } @@ -601,7 +635,8 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock, .addReg(CI.getDstReg(), getKillRegState(true)); } else { const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI); + TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, + TRI, Register()); assert(I != RestoreBlock.begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert @@ -1195,7 +1230,11 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) { BitVector UsedRegs(TRI.getNumRegs()); if (OnlyUsed) for (const MachineBasicBlock &MBB : MF) - for (const MachineInstr &MI : MBB) + for (const MachineInstr &MI : MBB) { + // skip debug instructions + if (MI.isDebugInstr()) + continue; + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; @@ -1205,6 +1244,12 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) { (MO.isDef() || MO.isUse())) UsedRegs.set(Reg); } + } + + // Get a list of registers that are used. + BitVector LiveIns(TRI.getNumRegs()); + for (const MachineBasicBlock::RegisterMaskPair &LI : MF.front().liveins()) + LiveIns.set(LI.PhysReg); BitVector RegsToZero(TRI.getNumRegs()); for (MCRegister Reg : AllocatableSet.set_bits()) { @@ -1221,8 +1266,14 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) { continue; // Want only registers used for arguments. - if (OnlyArg && !TRI.isArgumentRegister(MF, Reg)) - continue; + if (OnlyArg) { + if (OnlyUsed) { + if (!LiveIns[Reg]) + continue; + } else if (!TRI.isArgumentRegister(MF, Reg)) { + continue; + } + } RegsToZero.set(Reg); } @@ -1325,6 +1376,154 @@ void PEI::replaceFrameIndices(MachineFunction &MF) { } } +bool PEI::replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI, + unsigned OpIdx, int SPAdj) { + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + if (MI.isDebugValue()) { + + MachineOperand &Op = MI.getOperand(OpIdx); + assert(MI.isDebugOperand(&Op) && + "Frame indices can only appear as a debug operand in a DBG_VALUE*" + " machine instruction"); + Register Reg; + unsigned FrameIdx = Op.getIndex(); + unsigned Size = MF.getFrameInfo().getObjectSize(FrameIdx); + + StackOffset Offset = TFI->getFrameIndexReference(MF, FrameIdx, Reg); + Op.ChangeToRegister(Reg, false /*isDef*/); + + const DIExpression *DIExpr = MI.getDebugExpression(); + + // If we have a direct DBG_VALUE, and its location expression isn't + // currently complex, then adding an offset will morph it into a + // complex location that is interpreted as being a memory address. + // This changes a pointer-valued variable to dereference that pointer, + // which is incorrect. Fix by adding DW_OP_stack_value. + + if (MI.isNonListDebugValue()) { + unsigned PrependFlags = DIExpression::ApplyOffset; + if (!MI.isIndirectDebugValue() && !DIExpr->isComplex()) + PrependFlags |= DIExpression::StackValue; + + // If we have DBG_VALUE that is indirect and has a Implicit location + // expression need to insert a deref before prepending a Memory + // location expression. Also after doing this we change the DBG_VALUE + // to be direct. + if (MI.isIndirectDebugValue() && DIExpr->isImplicit()) { + SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size}; + bool WithStackValue = true; + DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue); + // Make the DBG_VALUE direct. + MI.getDebugOffset().ChangeToRegister(0, false); + } + DIExpr = TRI.prependOffsetExpression(DIExpr, PrependFlags, Offset); + } else { + // The debug operand at DebugOpIndex was a frame index at offset + // `Offset`; now the operand has been replaced with the frame + // register, we must add Offset with `register x, plus Offset`. + unsigned DebugOpIndex = MI.getDebugOperandIndex(&Op); + SmallVector<uint64_t, 3> Ops; + TRI.getOffsetOpcodes(Offset, Ops); + DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, DebugOpIndex); + } + MI.getDebugExpressionOp().setMetadata(DIExpr); + return true; + } + + if (MI.isDebugPHI()) { + // Allow stack ref to continue onwards. + return true; + } + + // TODO: This code should be commoned with the code for + // PATCHPOINT. There's no good reason for the difference in + // implementation other than historical accident. The only + // remaining difference is the unconditional use of the stack + // pointer as the base register. + if (MI.getOpcode() == TargetOpcode::STATEPOINT) { + assert((!MI.isDebugValue() || OpIdx == 0) && + "Frame indicies can only appear as the first operand of a " + "DBG_VALUE machine instruction"); + Register Reg; + MachineOperand &Offset = MI.getOperand(OpIdx + 1); + StackOffset refOffset = TFI->getFrameIndexReferencePreferSP( + MF, MI.getOperand(OpIdx).getIndex(), Reg, /*IgnoreSPUpdates*/ false); + assert(!refOffset.getScalable() && + "Frame offsets with a scalable component are not supported"); + Offset.setImm(Offset.getImm() + refOffset.getFixed() + SPAdj); + MI.getOperand(OpIdx).ChangeToRegister(Reg, false /*isDef*/); + return true; + } + return false; +} + +void PEI::replaceFrameIndicesBackward(MachineBasicBlock *BB, + MachineFunction &MF, int &SPAdj) { + assert(MF.getSubtarget().getRegisterInfo() && + "getRegisterInfo() must be implemented!"); + + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + + RS->enterBasicBlockEnd(*BB); + + for (MachineInstr &MI : make_early_inc_range(reverse(*BB))) { + + // Register scavenger backward step + MachineBasicBlock::iterator Step(MI); + for (unsigned i = 0; i != MI.getNumOperands(); ++i) { + if (!MI.getOperand(i).isFI()) + continue; + + if (replaceFrameIndexDebugInstr(MF, MI, i, SPAdj)) + continue; + + // If this instruction has a FrameIndex operand, we need to + // use that target machine register info object to eliminate + // it. + + // TRI.eliminateFrameIndex may lower the frame index to a sequence of + // instructions. It also can remove/change instructions passed by the + // iterator and invalidate the iterator. We have to take care of this. For + // that we support two iterators: *Step* - points to the position up to + // which the scavenger should scan by the next iteration to have liveness + // information up to date. *Curr* - keeps track of the correct RS->MBBI - + // the scan start point. It points to the currently processed instruction + // right before the frame lowering. + // + // ITERATORS WORK AS FOLLOWS: + // *Step* is shifted one step back right before the frame lowering and + // one step forward right after it. No matter how many instructions were + // inserted, *Step* will be right after the position which is going to be + // processed in the next iteration, thus, in the correct position for the + // scavenger to go up to. + // *Curr* is shifted one step forward right before calling + // TRI.eliminateFrameIndex and one step backward after. Thus, we make sure + // it points right to the position that is the correct starting point for + // the scavenger to scan. + MachineBasicBlock::iterator Curr = ++RS->getCurrentPosition(); + + // Shift back + --Step; + + bool Removed = TRI.eliminateFrameIndex(MI, SPAdj, i, RS); + // Restore to unify logic with a shift back that happens in the end of + // the outer loop. + ++Step; + RS->skipTo(--Curr); + if (Removed) + break; + } + + // Shift it to make RS collect reg info up to the current instruction. + if (Step != BB->begin()) + Step--; + + // Update register states. + RS->backward(Step); + } +} + void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, int &SPAdj) { assert(MF.getSubtarget().getRegisterInfo() && @@ -1333,6 +1532,9 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + if (RS && TRI.supportsBackwardScavenger()) + return replaceFrameIndicesBackward(BB, MF, SPAdj); + if (RS && FrameIndexEliminationScavenging) RS->enterBasicBlock(*BB); @@ -1353,83 +1555,8 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, if (!MI.getOperand(i).isFI()) continue; - // Frame indices in debug values are encoded in a target independent - // way with simply the frame index and offset rather than any - // target-specific addressing mode. - if (MI.isDebugValue()) { - MachineOperand &Op = MI.getOperand(i); - assert( - MI.isDebugOperand(&Op) && - "Frame indices can only appear as a debug operand in a DBG_VALUE*" - " machine instruction"); - Register Reg; - unsigned FrameIdx = Op.getIndex(); - unsigned Size = MF.getFrameInfo().getObjectSize(FrameIdx); - - StackOffset Offset = - TFI->getFrameIndexReference(MF, FrameIdx, Reg); - Op.ChangeToRegister(Reg, false /*isDef*/); - - const DIExpression *DIExpr = MI.getDebugExpression(); - - // If we have a direct DBG_VALUE, and its location expression isn't - // currently complex, then adding an offset will morph it into a - // complex location that is interpreted as being a memory address. - // This changes a pointer-valued variable to dereference that pointer, - // which is incorrect. Fix by adding DW_OP_stack_value. - - if (MI.isNonListDebugValue()) { - unsigned PrependFlags = DIExpression::ApplyOffset; - if (!MI.isIndirectDebugValue() && !DIExpr->isComplex()) - PrependFlags |= DIExpression::StackValue; - - // If we have DBG_VALUE that is indirect and has a Implicit location - // expression need to insert a deref before prepending a Memory - // location expression. Also after doing this we change the DBG_VALUE - // to be direct. - if (MI.isIndirectDebugValue() && DIExpr->isImplicit()) { - SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size}; - bool WithStackValue = true; - DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue); - // Make the DBG_VALUE direct. - MI.getDebugOffset().ChangeToRegister(0, false); - } - DIExpr = TRI.prependOffsetExpression(DIExpr, PrependFlags, Offset); - } else { - // The debug operand at DebugOpIndex was a frame index at offset - // `Offset`; now the operand has been replaced with the frame - // register, we must add Offset with `register x, plus Offset`. - unsigned DebugOpIndex = MI.getDebugOperandIndex(&Op); - SmallVector<uint64_t, 3> Ops; - TRI.getOffsetOpcodes(Offset, Ops); - DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, DebugOpIndex); - } - MI.getDebugExpressionOp().setMetadata(DIExpr); + if (replaceFrameIndexDebugInstr(MF, MI, i, SPAdj)) continue; - } else if (MI.isDebugPHI()) { - // Allow stack ref to continue onwards. - continue; - } - - // TODO: This code should be commoned with the code for - // PATCHPOINT. There's no good reason for the difference in - // implementation other than historical accident. The only - // remaining difference is the unconditional use of the stack - // pointer as the base register. - if (MI.getOpcode() == TargetOpcode::STATEPOINT) { - assert((!MI.isDebugValue() || i == 0) && - "Frame indicies can only appear as the first operand of a " - "DBG_VALUE machine instruction"); - Register Reg; - MachineOperand &Offset = MI.getOperand(i + 1); - StackOffset refOffset = TFI->getFrameIndexReferencePreferSP( - MF, MI.getOperand(i).getIndex(), Reg, /*IgnoreSPUpdates*/ false); - assert(!refOffset.getScalable() && - "Frame offsets with a scalable component are not supported"); - Offset.setImm(Offset.getImm() + refOffset.getFixed() + SPAdj); - MI.getOperand(i).ChangeToRegister(Reg, false /*isDef*/); - continue; - } // Some instructions (e.g. inline asm instructions) can have // multiple frame indices and/or cause eliminateFrameIndex diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp index 51de99b81057..dcb1a44c75e4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp @@ -105,8 +105,8 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) { static void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA, const DataFlowGraph &G) { - OS << Print<NodeId>(RA.Id, G) << '<' - << Print<RegisterRef>(RA.Addr->getRegRef(G), G) << '>'; + OS << Print(RA.Id, G) << '<' + << Print(RA.Addr->getRegRef(G), G) << '>'; if (RA.Addr->getFlags() & NodeAttrs::Fixed) OS << '!'; } @@ -115,16 +115,16 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<DefNode*>> &P) { printRefHeader(OS, P.Obj, P.G); OS << '('; if (NodeId N = P.Obj.Addr->getReachingDef()) - OS << Print<NodeId>(N, P.G); + OS << Print(N, P.G); OS << ','; if (NodeId N = P.Obj.Addr->getReachedDef()) - OS << Print<NodeId>(N, P.G); + OS << Print(N, P.G); OS << ','; if (NodeId N = P.Obj.Addr->getReachedUse()) - OS << Print<NodeId>(N, P.G); + OS << Print(N, P.G); OS << "):"; if (NodeId N = P.Obj.Addr->getSibling()) - OS << Print<NodeId>(N, P.G); + OS << Print(N, P.G); return OS; } @@ -132,10 +132,10 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<UseNode*>> &P) { printRefHeader(OS, P.Obj, P.G); OS << '('; if (NodeId N = P.Obj.Addr->getReachingDef()) - OS << Print<NodeId>(N, P.G); + OS << Print(N, P.G); OS << "):"; if (NodeId N = P.Obj.Addr->getSibling()) - OS << Print<NodeId>(N, P.G); + OS << Print(N, P.G); return OS; } @@ -144,13 +144,13 @@ raw_ostream &operator<< (raw_ostream &OS, printRefHeader(OS, P.Obj, P.G); OS << '('; if (NodeId N = P.Obj.Addr->getReachingDef()) - OS << Print<NodeId>(N, P.G); + OS << Print(N, P.G); OS << ','; if (NodeId N = P.Obj.Addr->getPredecessor()) - OS << Print<NodeId>(N, P.G); + OS << Print(N, P.G); OS << "):"; if (NodeId N = P.Obj.Addr->getSibling()) - OS << Print<NodeId>(N, P.G); + OS << Print(N, P.G); return OS; } @@ -172,7 +172,7 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<RefNode*>> &P) { raw_ostream &operator<< (raw_ostream &OS, const Print<NodeList> &P) { unsigned N = P.Obj.size(); for (auto I : P.Obj) { - OS << Print<NodeId>(I.Id, P.G); + OS << Print(I.Id, P.G); if (--N) OS << ' '; } @@ -182,7 +182,7 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeList> &P) { raw_ostream &operator<< (raw_ostream &OS, const Print<NodeSet> &P) { unsigned N = P.Obj.size(); for (auto I : P.Obj) { - OS << Print<NodeId>(I, P.G); + OS << Print(I, P.G); if (--N) OS << ' '; } @@ -214,7 +214,7 @@ namespace { } // end anonymous namespace raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) { - OS << Print<NodeId>(P.Obj.Id, P.G) << ": phi [" + OS << Print(P.Obj.Id, P.G) << ": phi [" << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']'; return OS; } @@ -222,7 +222,7 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) { raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<StmtNode *>> &P) { const MachineInstr &MI = *P.Obj.Addr->getCode(); unsigned Opc = MI.getOpcode(); - OS << Print<NodeId>(P.Obj.Id, P.G) << ": " << P.G.getTII().getName(Opc); + OS << Print(P.Obj.Id, P.G) << ": " << P.G.getTII().getName(Opc); // Print the target for calls and branches (for readability). if (MI.isCall() || MI.isBranch()) { MachineInstr::const_mop_iterator T = @@ -254,7 +254,7 @@ raw_ostream &operator<< (raw_ostream &OS, OS << PrintNode<StmtNode*>(P.Obj, P.G); break; default: - OS << "instr? " << Print<NodeId>(P.Obj.Id, P.G); + OS << "instr? " << Print(P.Obj.Id, P.G); break; } return OS; @@ -274,7 +274,7 @@ raw_ostream &operator<< (raw_ostream &OS, } }; - OS << Print<NodeId>(P.Obj.Id, P.G) << ": --- " << printMBBReference(*BB) + OS << Print(P.Obj.Id, P.G) << ": --- " << printMBBReference(*BB) << " --- preds(" << NP << "): "; for (MachineBasicBlock *B : BB->predecessors()) Ns.push_back(B->getNumber()); @@ -294,7 +294,7 @@ raw_ostream &operator<< (raw_ostream &OS, } raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<FuncNode *>> &P) { - OS << "DFG dump:[\n" << Print<NodeId>(P.Obj.Id, P.G) << ": Function: " + OS << "DFG dump:[\n" << Print(P.Obj.Id, P.G) << ": Function: " << P.Obj.Addr->getCode()->getName() << '\n'; for (auto I : P.Obj.Addr->members(P.G)) OS << PrintNode<BlockNode*>(I, P.G) << '\n'; @@ -305,7 +305,7 @@ raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<FuncNode *>> &P) { raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterSet> &P) { OS << '{'; for (auto I : P.Obj) - OS << ' ' << Print<RegisterRef>(I, P.G); + OS << ' ' << Print(I, P.G); OS << " }"; return OS; } @@ -318,8 +318,8 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterAggr> &P) { raw_ostream &operator<< (raw_ostream &OS, const Print<DataFlowGraph::DefStack> &P) { for (auto I = P.Obj.top(), E = P.Obj.bottom(); I != E; ) { - OS << Print<NodeId>(I->Id, P.G) - << '<' << Print<RegisterRef>(I->Addr->getRegRef(P.G), P.G) << '>'; + OS << Print(I->Id, P.G) + << '<' << Print(I->Addr->getRegRef(P.G), P.G) << '>'; I.down(); if (I != E) OS << ' '; @@ -623,7 +623,7 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum) return true; const MCInstrDesc &D = In.getDesc(); - if (!D.getImplicitDefs() && !D.getImplicitUses()) + if (D.implicit_defs().empty() && D.implicit_uses().empty()) return false; const MachineOperand &Op = In.getOperand(OpNum); // If there is a sub-register, treat the operand as non-fixed. Currently, @@ -632,14 +632,9 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum) if (Op.getSubReg() != 0) return false; Register Reg = Op.getReg(); - const MCPhysReg *ImpR = Op.isDef() ? D.getImplicitDefs() - : D.getImplicitUses(); - if (!ImpR) - return false; - while (*ImpR) - if (*ImpR++ == Reg) - return true; - return false; + ArrayRef<MCPhysReg> ImpOps = + Op.isDef() ? D.implicit_defs() : D.implicit_uses(); + return is_contained(ImpOps, Reg); } // @@ -648,6 +643,14 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum) DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii, const TargetRegisterInfo &tri, const MachineDominatorTree &mdt, + const MachineDominanceFrontier &mdf) + : DefaultTOI(std::make_unique<TargetOperandInfo>(tii)), MF(mf), TII(tii), + TRI(tri), PRI(tri, mf), MDT(mdt), MDF(mdf), TOI(*DefaultTOI), + LiveIns(PRI) { +} + +DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii, + const TargetRegisterInfo &tri, const MachineDominatorTree &mdt, const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi) : MF(mf), TII(tii), TRI(tri), PRI(tri, mf), MDT(mdt), MDF(mdf), TOI(toi), LiveIns(PRI) { @@ -1087,7 +1090,7 @@ void DataFlowGraph::pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) { if (!Defined.insert(RR.Reg).second) { MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode(); dbgs() << "Multiple definitions of register: " - << Print<RegisterRef>(RR, *this) << " in\n " << *MI << "in " + << Print(RR, *this) << " in\n " << *MI << "in " << printMBBReference(*MI->getParent()) << '\n'; llvm_unreachable(nullptr); } @@ -1275,7 +1278,7 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) { if (!Op.isReg() || !Op.isDef() || Op.isImplicit()) continue; Register R = Op.getReg(); - if (!R || !Register::isPhysicalRegister(R)) + if (!R || !R.isPhysical()) continue; uint16_t Flags = NodeAttrs::None; if (TOI.isPreserving(In, OpN)) { @@ -1320,7 +1323,7 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) { if (!Op.isReg() || !Op.isDef() || !Op.isImplicit()) continue; Register R = Op.getReg(); - if (!R || !Register::isPhysicalRegister(R) || DoneDefs.test(R)) + if (!R || !R.isPhysical() || DoneDefs.test(R)) continue; RegisterRef RR = makeRegRef(Op); uint16_t Flags = NodeAttrs::None; @@ -1349,7 +1352,7 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) { if (!Op.isReg() || !Op.isUse()) continue; Register R = Op.getReg(); - if (!R || !Register::isPhysicalRegister(R)) + if (!R || !R.isPhysical()) continue; uint16_t Flags = NodeAttrs::None; if (Op.isUndef()) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp index d8eac20d16b6..902b29d41ce1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp @@ -64,7 +64,7 @@ namespace rdf { for (const auto &I : P.Obj) { OS << ' ' << printReg(I.first, &P.G.getTRI()) << '{'; for (auto J = I.second.begin(), E = I.second.end(); J != E; ) { - OS << Print<NodeId>(J->first, P.G) << PrintLaneMaskOpt(J->second); + OS << Print(J->first, P.G) << PrintLaneMaskOpt(J->second); if (++J != E) OS << ','; } @@ -619,10 +619,9 @@ void Liveness::computePhiInfo() { if (Trace) { dbgs() << "Phi-up-to-phi map with intervening defs:\n"; for (auto I : PhiUp) { - dbgs() << "phi " << Print<NodeId>(I.first, DFG) << " -> {"; + dbgs() << "phi " << Print(I.first, DFG) << " -> {"; for (auto R : I.second) - dbgs() << ' ' << Print<NodeId>(R.first, DFG) - << Print<RegisterAggr>(R.second, DFG); + dbgs() << ' ' << Print(R.first, DFG) << Print(R.second, DFG); dbgs() << " }\n"; } } @@ -720,16 +719,16 @@ void Liveness::computePhiInfo() { if (Trace) { dbgs() << "Real use map:\n"; for (auto I : RealUseMap) { - dbgs() << "phi " << Print<NodeId>(I.first, DFG); + dbgs() << "phi " << Print(I.first, DFG); NodeAddr<PhiNode*> PA = DFG.addr<PhiNode*>(I.first); NodeList Ds = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Def>, DFG); if (!Ds.empty()) { RegisterRef RR = NodeAddr<DefNode*>(Ds[0]).Addr->getRegRef(DFG); - dbgs() << '<' << Print<RegisterRef>(RR, DFG) << '>'; + dbgs() << '<' << Print(RR, DFG) << '>'; } else { dbgs() << "<noreg>"; } - dbgs() << " -> " << Print<RefMap>(I.second, DFG) << '\n'; + dbgs() << " -> " << Print(I.second, DFG) << '\n'; } } } @@ -788,7 +787,7 @@ void Liveness::computeLiveIns() { dbgs() << "Phi live-on-entry map:\n"; for (auto &I : PhiLON) dbgs() << "block #" << I.first->getNumber() << " -> " - << Print<RefMap>(I.second, DFG) << '\n'; + << Print(I.second, DFG) << '\n'; } // Build the phi live-on-exit map. Each phi node has some set of reached @@ -851,7 +850,7 @@ void Liveness::computeLiveIns() { dbgs() << "Phi live-on-exit map:\n"; for (auto &I : PhiLOX) dbgs() << "block #" << I.first->getNumber() << " -> " - << Print<RefMap>(I.second, DFG) << '\n'; + << Print(I.second, DFG) << '\n'; } RefMap LiveIn; @@ -869,9 +868,9 @@ void Liveness::computeLiveIns() { llvm::sort(LV); dbgs() << printMBBReference(B) << "\t rec = {"; for (auto I : LV) - dbgs() << ' ' << Print<RegisterRef>(I, DFG); + dbgs() << ' ' << Print(I, DFG); dbgs() << " }\n"; - //dbgs() << "\tcomp = " << Print<RegisterAggr>(LiveMap[&B], DFG) << '\n'; + //dbgs() << "\tcomp = " << Print(LiveMap[&B], DFG) << '\n'; LV.clear(); const RegisterAggr &LG = LiveMap[&B]; @@ -880,7 +879,7 @@ void Liveness::computeLiveIns() { llvm::sort(LV); dbgs() << "\tcomp = {"; for (auto I : LV) - dbgs() << ' ' << Print<RegisterRef>(I, DFG); + dbgs() << ' ' << Print(I, DFG); dbgs() << " }\n"; } @@ -942,7 +941,7 @@ void Liveness::resetKills(MachineBasicBlock *B) { if (!Op.isReg() || !Op.isDef() || Op.isImplicit()) continue; Register R = Op.getReg(); - if (!Register::isPhysicalRegister(R)) + if (!R.isPhysical()) continue; for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR) Live.reset(*SR); @@ -951,7 +950,7 @@ void Liveness::resetKills(MachineBasicBlock *B) { if (!Op.isReg() || !Op.isUse() || Op.isUndef()) continue; Register R = Op.getReg(); - if (!Register::isPhysicalRegister(R)) + if (!R.isPhysical()) continue; bool IsLive = false; for (MCRegAliasIterator AR(R, &TRI, true); AR.isValid(); ++AR) { @@ -1018,8 +1017,8 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) { for (auto *I : *N) dbgs() << ' ' << I->getBlock()->getNumber(); dbgs() << " }\n"; - dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n'; - dbgs() << " Local: " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n'; + dbgs() << " LiveIn: " << Print(LiveIn, DFG) << '\n'; + dbgs() << " Local: " << Print(LiveMap[B], DFG) << '\n'; } // Add reaching defs of phi uses that are live on exit from this block. @@ -1029,8 +1028,8 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) { if (Trace) { dbgs() << "after LOX\n"; - dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n'; - dbgs() << " Local: " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n'; + dbgs() << " LiveIn: " << Print(LiveIn, DFG) << '\n'; + dbgs() << " Local: " << Print(LiveMap[B], DFG) << '\n'; } // The LiveIn map at this point has all defs that are live-on-exit from B, @@ -1113,8 +1112,8 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) { if (Trace) { dbgs() << "after defs in block\n"; - dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n'; - dbgs() << " Local: " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n'; + dbgs() << " LiveIn: " << Print(LiveIn, DFG) << '\n'; + dbgs() << " Local: " << Print(LiveMap[B], DFG) << '\n'; } // Scan the block for upward-exposed uses and add them to the tracking set. @@ -1134,8 +1133,8 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) { if (Trace) { dbgs() << "after uses in block\n"; - dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n'; - dbgs() << " Local: " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n'; + dbgs() << " LiveIn: " << Print(LiveIn, DFG) << '\n'; + dbgs() << " Local: " << Print(LiveMap[B], DFG) << '\n'; } // Phi uses should not be propagated up the dominator tree, since they @@ -1151,8 +1150,8 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) { if (Trace) { dbgs() << "after phi uses in block\n"; - dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n'; - dbgs() << " Local: " << Print<RegisterAggr>(Local, DFG) << '\n'; + dbgs() << " LiveIn: " << Print(LiveIn, DFG) << '\n'; + dbgs() << " Local: " << Print(Local, DFG) << '\n'; } for (auto *C : IIDF[B]) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp index 990dd84c829d..900f0e9079d6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp @@ -156,7 +156,7 @@ void RegAllocBase::allocatePhysRegs() { continue; } LLVM_DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); - assert(Register::isVirtualRegister(SplitVirtReg->reg()) && + assert(SplitVirtReg->reg().isVirtual() && "expect split value in virtual register"); enqueue(SplitVirtReg); ++NumNewQueued; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp index ee03feda796f..b1743d3f987d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp @@ -95,7 +95,7 @@ template <> Pass *llvm::callDefaultCtor<RegAllocEvictionAdvisorAnalysis>() { Ret = new DefaultEvictionAdvisorAnalysis(/*NotAsRequested*/ false); break; case RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development: -#if defined(LLVM_HAVE_TF_API) +#if defined(LLVM_HAVE_TFLITE) Ret = createDevelopmentModeAdvisor(); #endif break; @@ -210,7 +210,7 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost( // Check if any interfering live range is heavier than MaxWeight. for (const LiveInterval *Intf : reverse(Interferences)) { - assert(Register::isVirtualRegister(Intf->reg()) && + assert(Intf->reg().isVirtual() && "Only expecting virtual register interference from query"); // Do not allow eviction of a virtual register if we are in the middle diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h index d6a3997e4b70..46838570a2fc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h @@ -10,7 +10,6 @@ #define LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/Register.h" @@ -126,9 +125,9 @@ protected: // Get the upper limit of elements in the given Order we need to analize. // TODO: is this heuristic, we could consider learning it. - Optional<unsigned> getOrderLimit(const LiveInterval &VirtReg, - const AllocationOrder &Order, - unsigned CostPerUseLimit) const; + std::optional<unsigned> getOrderLimit(const LiveInterval &VirtReg, + const AllocationOrder &Order, + unsigned CostPerUseLimit) const; // Determine if it's worth trying to allocate this reg, given the // CostPerUseLimit @@ -177,6 +176,8 @@ public: virtual std::unique_ptr<RegAllocEvictionAdvisor> getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0; AdvisorMode getAdvisorMode() const { return Mode; } + virtual void logRewardIfNeeded(const MachineFunction &MF, + llvm::function_ref<float()> GetReward){}; protected: // This analysis preserves everything, and subclasses may have additional diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp index cb552f212fbb..775e66e48406 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp @@ -281,6 +281,7 @@ namespace { Register traceCopies(Register VirtReg) const; Register traceCopyChain(Register Reg) const; + bool shouldAllocateRegister(const Register Reg) const; int getStackSpaceFor(Register VirtReg); void spill(MachineBasicBlock::iterator Before, Register VirtReg, MCPhysReg AssignedReg, bool Kill, bool LiveOut); @@ -300,6 +301,12 @@ char RegAllocFast::ID = 0; INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false, false) +bool RegAllocFast::shouldAllocateRegister(const Register Reg) const { + assert(Reg.isVirtual()); + const TargetRegisterClass &RC = *MRI->getRegClass(Reg); + return ShouldAllocateClass(*TRI, RC); +} + void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) { for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) RegUnitStates[*UI] = NewState; @@ -428,7 +435,8 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg, LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n'); const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); - TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI); + TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI, + VirtReg); ++NumStores; MachineBasicBlock::iterator FirstTerm = MBB->getFirstTerminator(); @@ -485,7 +493,7 @@ void RegAllocFast::reload(MachineBasicBlock::iterator Before, Register VirtReg, << printReg(PhysReg, TRI) << '\n'); int FI = getStackSpaceFor(VirtReg); const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); - TII->loadRegFromStackSlot(*MBB, Before, PhysReg, FI, &RC, TRI); + TII->loadRegFromStackSlot(*MBB, Before, PhysReg, FI, &RC, TRI, VirtReg); ++NumLoads; } @@ -841,7 +849,9 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) { assert(MO.isUndef() && "expected undef use"); Register VirtReg = MO.getReg(); - assert(Register::isVirtualRegister(VirtReg) && "Expected virtreg"); + assert(VirtReg.isVirtual() && "Expected virtreg"); + if (!shouldAllocateRegister(VirtReg)) + return; LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); MCPhysReg PhysReg; @@ -867,6 +877,8 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) { /// (tied or earlyclobber) that may interfere with preassigned uses. void RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg) { + if (!shouldAllocateRegister(VirtReg)) + return; LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); if (LRI != LiveVirtRegs.end()) { MCPhysReg PrevReg = LRI->PhysReg; @@ -900,6 +912,8 @@ void RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum, void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg, bool LookAtPhysRegUses) { assert(VirtReg.isVirtual() && "Not a virtual register"); + if (!shouldAllocateRegister(VirtReg)) + return; MachineOperand &MO = MI.getOperand(OpNum); LiveRegMap::iterator LRI; bool New; @@ -950,6 +964,8 @@ void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg) { assert(VirtReg.isVirtual() && "Not a virtual register"); + if (!shouldAllocateRegister(VirtReg)) + return; MachineOperand &MO = MI.getOperand(OpNum); LiveRegMap::iterator LRI; bool New; @@ -974,8 +990,13 @@ void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum, Register Hint; if (MI.isCopy() && MI.getOperand(1).getSubReg() == 0) { Hint = MI.getOperand(0).getReg(); - assert(Hint.isPhysical() && - "Copy destination should already be assigned"); + if (Hint.isVirtual()) { + assert(!shouldAllocateRegister(Hint)); + Hint = Register(); + } else { + assert(Hint.isPhysical() && + "Copy destination should already be assigned"); + } } allocVirtReg(MI, *LRI, Hint, false); if (LRI->Error) { @@ -1083,6 +1104,8 @@ void RegAllocFast::addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts assert(RegClassDefCounts.size() == TRI->getNumRegClasses()); if (Reg.isVirtual()) { + if (!shouldAllocateRegister(Reg)) + return; const TargetRegisterClass *OpRC = MRI->getRegClass(Reg); for (unsigned RCIdx = 0, RCIdxEnd = TRI->getNumRegClasses(); RCIdx != RCIdxEnd; ++RCIdx) { @@ -1142,6 +1165,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { if (MO.isReg()) { Register Reg = MO.getReg(); if (Reg.isVirtual()) { + if (!shouldAllocateRegister(Reg)) + continue; if (MO.isDef()) { HasDef = true; HasVRegDef = true; @@ -1205,7 +1230,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { } if (MO.isDef()) { - if (Reg.isVirtual()) + if (Reg.isVirtual() && shouldAllocateRegister(Reg)) DefOperandIndexes.push_back(I); addRegClassDefCounts(RegClassDefCounts, Reg); @@ -1295,6 +1320,10 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { Register Reg = MO.getReg(); if (!Reg) continue; + if (Reg.isVirtual()) { + assert(!shouldAllocateRegister(Reg)); + continue; + } assert(Reg.isPhysical()); if (MRI->isReserved(Reg)) continue; @@ -1329,7 +1358,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { if (MRI->isReserved(Reg)) continue; bool displacedAny = usePhysReg(MI, Reg); - if (!displacedAny && !MRI->isReserved(Reg)) + if (!displacedAny) MO.setIsKill(true); } } @@ -1341,7 +1370,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { if (!MO.isReg() || !MO.isUse()) continue; Register Reg = MO.getReg(); - if (!Reg.isVirtual()) + if (!Reg.isVirtual() || !shouldAllocateRegister(Reg)) continue; if (MO.isUndef()) { @@ -1368,7 +1397,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { if (!MO.isReg() || !MO.isUse()) continue; Register Reg = MO.getReg(); - if (!Reg.isVirtual()) + if (!Reg.isVirtual() || !shouldAllocateRegister(Reg)) continue; assert(MO.isUndef() && "Should only have undef virtreg uses left"); @@ -1381,16 +1410,15 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { for (MachineOperand &MO : llvm::reverse(MI.operands())) { if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber()) continue; - // subreg defs don't free the full register. We left the subreg number - // around as a marker in setPhysReg() to recognize this case here. - if (MO.getSubReg() != 0) { - MO.setSubReg(0); - continue; - } + assert(!MO.getSubReg() && "should be already handled in def processing"); Register Reg = MO.getReg(); if (!Reg) continue; + if (Reg.isVirtual()) { + assert(!shouldAllocateRegister(Reg)); + continue; + } assert(Reg.isPhysical() && "should have register assigned"); // We sometimes get odd situations like: @@ -1418,7 +1446,9 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) { // Ignore DBG_VALUEs that aren't based on virtual registers. These are // mostly constants and frame indices. for (Register Reg : MI.getUsedDebugRegs()) { - if (!Register::isVirtualRegister(Reg)) + if (!Reg.isVirtual()) + continue; + if (!shouldAllocateRegister(Reg)) continue; // Already spilled to a stackslot? @@ -1460,7 +1490,7 @@ void RegAllocFast::handleBundle(MachineInstr &MI) { continue; Register Reg = MO.getReg(); - if (!Reg.isVirtual()) + if (!Reg.isVirtual() || !shouldAllocateRegister(Reg)) continue; DenseMap<Register, MCPhysReg>::iterator DI; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp index 9c6cb7c3a4e2..b43a4d2a4b85 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -17,12 +17,12 @@ #include "LiveDebugVariables.h" #include "RegAllocBase.h" #include "RegAllocEvictionAdvisor.h" +#include "RegAllocPriorityAdvisor.h" #include "SpillPlacement.h" #include "SplitKit.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -163,6 +163,7 @@ INITIALIZE_PASS_DEPENDENCY(EdgeBundles) INITIALIZE_PASS_DEPENDENCY(SpillPlacement) INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) INITIALIZE_PASS_DEPENDENCY(RegAllocEvictionAdvisorAnalysis) +INITIALIZE_PASS_DEPENDENCY(RegAllocPriorityAdvisorAnalysis) INITIALIZE_PASS_END(RAGreedy, "greedy", "Greedy Register Allocator", false, false) @@ -219,6 +220,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<SpillPlacement>(); AU.addRequired<MachineOptimizationRemarkEmitterPass>(); AU.addRequired<RegAllocEvictionAdvisorAnalysis>(); + AU.addRequired<RegAllocPriorityAdvisorAnalysis>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -279,16 +281,28 @@ void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); } void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) { // Prioritize live ranges by size, assigning larger ranges first. // The queue holds (size, reg) pairs. - const unsigned Size = LI->getSize(); const Register Reg = LI->reg(); assert(Reg.isVirtual() && "Can only enqueue virtual registers"); - unsigned Prio; auto Stage = ExtraInfo->getOrInitStage(Reg); if (Stage == RS_New) { Stage = RS_Assign; ExtraInfo->setStage(Reg, Stage); } + + unsigned Ret = PriorityAdvisor->getPriority(*LI); + + // The virtual register number is a tie breaker for same-sized ranges. + // Give lower vreg numbers higher priority to assign them first. + CurQueue.push(std::make_pair(Ret, ~Reg)); +} + +unsigned DefaultPriorityAdvisor::getPriority(const LiveInterval &LI) const { + const unsigned Size = LI.getSize(); + const Register Reg = LI.reg(); + unsigned Prio; + LiveRangeStage Stage = RA.getExtraInfo().getStage(LI); + if (Stage == RS_Split) { // Unsplit ranges that couldn't be allocated immediately are deferred until // everything else has been allocated. @@ -304,23 +318,24 @@ void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) { // Giant live ranges fall back to the global assignment heuristic, which // prevents excessive spilling in pathological cases. const TargetRegisterClass &RC = *MRI->getRegClass(Reg); - bool ForceGlobal = !ReverseLocalAssignment && - (Size / SlotIndex::InstrDist) > - (2 * RegClassInfo.getNumAllocatableRegs(&RC)); + bool ForceGlobal = RC.GlobalPriority || + (!ReverseLocalAssignment && + (Size / SlotIndex::InstrDist) > + (2 * RegClassInfo.getNumAllocatableRegs(&RC))); unsigned GlobalBit = 0; - if (Stage == RS_Assign && !ForceGlobal && !LI->empty() && - LIS->intervalIsInOneMBB(*LI)) { + if (Stage == RS_Assign && !ForceGlobal && !LI.empty() && + LIS->intervalIsInOneMBB(LI)) { // Allocate original local ranges in linear instruction order. Since they // are singly defined, this produces optimal coloring in the absence of // global interference and other constraints. if (!ReverseLocalAssignment) - Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); + Prio = LI.beginIndex().getApproxInstrDistance(Indexes->getLastIndex()); else { // Allocating bottom up may allow many short LRGs to be assigned first // to one of the cheap registers. This could be much faster for very // large blocks on targets with many physical registers. - Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex()); + Prio = Indexes->getZeroIndex().getApproxInstrDistance(LI.endIndex()); } } else { // Allocate global and split ranges in long->short order. Long ranges that @@ -329,6 +344,22 @@ void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) { Prio = Size; GlobalBit = 1; } + + // Priority bit layout: + // 31 RS_Assign priority + // 30 Preference priority + // if (RegClassPriorityTrumpsGlobalness) + // 29-25 AllocPriority + // 24 GlobalBit + // else + // 29 Global bit + // 28-24 AllocPriority + // 0-23 Size/Instr distance + + // Clamp the size to fit with the priority masking scheme + Prio = std::min(Prio, (unsigned)maxUIntN(24)); + assert(isUInt<5>(RC.AllocationPriority) && "allocation priority overflow"); + if (RegClassPriorityTrumpsGlobalness) Prio |= RC.AllocationPriority << 25 | GlobalBit << 24; else @@ -341,9 +372,8 @@ void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) { if (VRM->hasKnownPreference(Reg)) Prio |= (1u << 30); } - // The virtual register number is a tie breaker for same-sized ranges. - // Give lower vreg numbers higher priority to assign them first. - CurQueue.push(std::make_pair(Prio, ~Reg)); + + return Prio; } const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } @@ -493,7 +523,7 @@ bool RegAllocEvictionAdvisor::isUnusedCalleeSavedReg(MCRegister PhysReg) const { return !Matrix->isPhysRegUsed(PhysReg); } -Optional<unsigned> +std::optional<unsigned> RegAllocEvictionAdvisor::getOrderLimit(const LiveInterval &VirtReg, const AllocationOrder &Order, unsigned CostPerUseLimit) const { @@ -506,7 +536,7 @@ RegAllocEvictionAdvisor::getOrderLimit(const LiveInterval &VirtReg, if (MinCost >= CostPerUseLimit) { LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost << ", no cheaper registers to be found.\n"); - return None; + return std::nullopt; } // It is normal for register classes to have a long tail of registers with @@ -651,7 +681,7 @@ bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, assert(T < GroupSize && "Array overflow"); TBS[T] = Number; if (++T == GroupSize) { - SpillPlacer->addLinks(makeArrayRef(TBS, T)); + SpillPlacer->addLinks(ArrayRef(TBS, T)); T = 0; } continue; @@ -680,13 +710,13 @@ bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, BCS[B].Exit = SpillPlacement::PrefSpill; if (++B == GroupSize) { - SpillPlacer->addConstraints(makeArrayRef(BCS, B)); + SpillPlacer->addConstraints(ArrayRef(BCS, B)); B = 0; } } - SpillPlacer->addConstraints(makeArrayRef(BCS, B)); - SpillPlacer->addLinks(makeArrayRef(TBS, T)); + SpillPlacer->addConstraints(ArrayRef(BCS, B)); + SpillPlacer->addLinks(ArrayRef(TBS, T)); return true; } @@ -727,7 +757,7 @@ bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) { // Compute through constraints from the interference, or assume that all // through blocks prefer spilling when forming compact regions. - auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); + auto NewBlocks = ArrayRef(ActiveBlocks).slice(AddedTo); if (Cand.PhysReg) { if (!addThroughConstraints(Cand.Intf, NewBlocks)) return false; @@ -1227,6 +1257,55 @@ static unsigned getNumAllocatableRegsForConstraints( return RCI.getNumAllocatableRegs(ConstrainedRC); } +static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const MachineInstr &MI, Register Reg) { + LaneBitmask Mask; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || MO.getReg() != Reg) + continue; + + unsigned SubReg = MO.getSubReg(); + if (SubReg == 0 && MO.isUse()) { + Mask |= MRI.getMaxLaneMaskForVReg(Reg); + continue; + } + + LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg); + if (MO.isDef()) { + if (!MO.isUndef()) + Mask |= ~SubRegMask; + } else + Mask |= SubRegMask; + } + + return Mask; +} + +/// Return true if \p MI at \P Use reads a subset of the lanes live in \p +/// VirtReg. +static bool readsLaneSubset(const MachineRegisterInfo &MRI, + const MachineInstr *MI, const LiveInterval &VirtReg, + const TargetRegisterInfo *TRI, SlotIndex Use) { + // Early check the common case. + if (MI->isCopy() && + MI->getOperand(0).getSubReg() == MI->getOperand(1).getSubReg()) + return false; + + // FIXME: We're only considering uses, but should be consider defs too? + LaneBitmask ReadMask = getInstReadLaneMask(MRI, *TRI, *MI, VirtReg.reg()); + + LaneBitmask LiveAtMask; + for (const LiveInterval::SubRange &S : VirtReg.subranges()) { + if (S.liveAt(Use)) + LiveAtMask |= S.LaneMask; + } + + // If the live lanes aren't different from the lanes used by the instruction, + // this doesn't help. + return (ReadMask & ~(LiveAtMask & TRI->getCoveringLanes())).any(); +} + /// tryInstructionSplit - Split a live range around individual instructions. /// This is normally not worthwhile since the spiller is doing essentially the /// same thing. However, when the live range is in a constrained register @@ -1239,8 +1318,13 @@ unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg, SmallVectorImpl<Register> &NewVRegs) { const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg()); // There is no point to this if there are no larger sub-classes. - if (!RegClassInfo.isProperSubClass(CurRC)) - return 0; + + bool SplitSubClass = true; + if (!RegClassInfo.isProperSubClass(CurRC)) { + if (!VirtReg.hasSubRanges()) + return 0; + SplitSubClass = false; + } // Always enable split spill mode, since we're effectively spilling to a // register. @@ -1263,14 +1347,19 @@ unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg, // Otherwise, splitting just inserts uncoalescable copies that do not help // the allocation. for (const SlotIndex Use : Uses) { - if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) + if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) { if (MI->isFullCopy() || - SuperRCNumAllocatableRegs == - getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC, - TII, TRI, RegClassInfo)) { + (SplitSubClass && + SuperRCNumAllocatableRegs == + getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC, + TII, TRI, RegClassInfo)) || + // TODO: Handle split for subranges with subclass constraints? + (!SplitSubClass && VirtReg.hasSubRanges() && + !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use))) { LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI); continue; } + } SE->openIntv(); SlotIndex SegStart = SE->enterIntvBefore(Use); SlotIndex SegStop = SE->leaveIntvAfter(Use); @@ -2113,7 +2202,7 @@ void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) { Reg = RecoloringCandidates.pop_back_val(); // We cannot recolor physical register. - if (Register::isPhysicalRegister(Reg)) + if (Reg.isPhysical()) continue; // This may be a skipped class @@ -2207,7 +2296,7 @@ void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) { /// getting rid of 2 copies. void RAGreedy::tryHintsRecoloring() { for (const LiveInterval *LI : SetOfBrokenHints) { - assert(Register::isVirtualRegister(LI->reg()) && + assert(LI->reg().isVirtual() && "Recoloring is possible only for virtual registers"); // Some dead defs may be around (e.g., because of debug uses). // Ignore those. @@ -2369,11 +2458,25 @@ RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) { }; for (MachineInstr &MI : MBB) { if (MI.isCopy()) { - MachineOperand &Dest = MI.getOperand(0); - MachineOperand &Src = MI.getOperand(1); - if (Dest.isReg() && Src.isReg() && Dest.getReg().isVirtual() && - Src.getReg().isVirtual()) - ++Stats.Copies; + const MachineOperand &Dest = MI.getOperand(0); + const MachineOperand &Src = MI.getOperand(1); + Register SrcReg = Src.getReg(); + Register DestReg = Dest.getReg(); + // Only count `COPY`s with a virtual register as source or destination. + if (SrcReg.isVirtual() || DestReg.isVirtual()) { + if (SrcReg.isVirtual()) { + SrcReg = VRM->getPhys(SrcReg); + if (Src.getSubReg()) + SrcReg = TRI->getSubReg(SrcReg, Src.getSubReg()); + } + if (DestReg.isVirtual()) { + DestReg = VRM->getPhys(DestReg); + if (Dest.getSubReg()) + DestReg = TRI->getSubReg(DestReg, Dest.getSubReg()); + } + if (SrcReg != DestReg) + ++Stats.Copies; + } continue; } @@ -2540,6 +2643,8 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { ExtraInfo.emplace(); EvictAdvisor = getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this); + PriorityAdvisor = + getAnalysis<RegAllocPriorityAdvisorAnalysis>().getAdvisor(*MF, *this); VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI); SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, *VRAI)); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h index 483f59ed8e8e..e0ac88c0aeb9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h @@ -15,6 +15,7 @@ #include "InterferenceCache.h" #include "RegAllocBase.h" #include "RegAllocEvictionAdvisor.h" +#include "RegAllocPriorityAdvisor.h" #include "SpillPlacement.h" #include "SplitKit.h" #include "llvm/ADT/ArrayRef.h" @@ -79,7 +80,7 @@ public: unsigned NextCascade = 1; public: - ExtraRegInfo() = default; + ExtraRegInfo() {} ExtraRegInfo(const ExtraRegInfo &) = delete; LiveRangeStage getStage(Register Reg) const { return Info[Reg].Stage; } @@ -147,10 +148,17 @@ public: size_t getQueueSize() const { return Queue.size(); } // end (interface to eviction advisers) + // Interface to priority advisers + bool getRegClassPriorityTrumpsGlobalness() const { + return RegClassPriorityTrumpsGlobalness; + } + bool getReverseLocalAssignment() const { return ReverseLocalAssignment; } + // end (interface to priority advisers) + private: // Convenient shortcuts. using PQueue = std::priority_queue<std::pair<unsigned, unsigned>>; - using SmallLISet = SmallPtrSet<const LiveInterval *, 4>; + using SmallLISet = SmallSetVector<const LiveInterval *, 4>; // We need to track all tentative recolorings so we can roll back any // successful and unsuccessful recoloring attempts. @@ -177,9 +185,11 @@ private: std::unique_ptr<Spiller> SpillerInstance; PQueue Queue; std::unique_ptr<VirtRegAuxInfo> VRAI; - Optional<ExtraRegInfo> ExtraInfo; + std::optional<ExtraRegInfo> ExtraInfo; std::unique_ptr<RegAllocEvictionAdvisor> EvictAdvisor; + std::unique_ptr<RegAllocPriorityAdvisor> PriorityAdvisor; + // Enum CutOffStage to keep a track whether the register allocation failed // because of the cutoffs encountered in last chance recoloring. // Note: This is used as bitmask. New value should be next power of 2. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp new file mode 100644 index 000000000000..b3a13cc92316 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp @@ -0,0 +1,114 @@ +//===- RegAllocPriorityAdvisor.cpp - live ranges priority advisor ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of the default priority advisor and of the Analysis pass. +// +//===----------------------------------------------------------------------===// + +#include "RegAllocPriorityAdvisor.h" +#include "RegAllocGreedy.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" + +using namespace llvm; + +static cl::opt<RegAllocPriorityAdvisorAnalysis::AdvisorMode> Mode( + "regalloc-enable-priority-advisor", cl::Hidden, + cl::init(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default), + cl::desc("Enable regalloc advisor mode"), + cl::values( + clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default, + "default", "Default"), + clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Release, + "release", "precompiled"), + clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Development, + "development", "for training"))); + +char RegAllocPriorityAdvisorAnalysis::ID = 0; +INITIALIZE_PASS(RegAllocPriorityAdvisorAnalysis, "regalloc-priority", + "Regalloc priority policy", false, true) + +namespace { +class DefaultPriorityAdvisorAnalysis final + : public RegAllocPriorityAdvisorAnalysis { +public: + DefaultPriorityAdvisorAnalysis(bool NotAsRequested) + : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Default), + NotAsRequested(NotAsRequested) {} + + // support for isa<> and dyn_cast. + static bool classof(const RegAllocPriorityAdvisorAnalysis *R) { + return R->getAdvisorMode() == AdvisorMode::Default; + } + +private: + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<SlotIndexes>(); + RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU); + } + std::unique_ptr<RegAllocPriorityAdvisor> + getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { + return std::make_unique<DefaultPriorityAdvisor>( + MF, RA, &getAnalysis<SlotIndexes>()); + } + bool doInitialization(Module &M) override { + if (NotAsRequested) + M.getContext().emitError("Requested regalloc priority advisor analysis " + "could be created. Using default"); + return RegAllocPriorityAdvisorAnalysis::doInitialization(M); + } + const bool NotAsRequested; +}; +} // namespace + +template <> Pass *llvm::callDefaultCtor<RegAllocPriorityAdvisorAnalysis>() { + Pass *Ret = nullptr; + switch (Mode) { + case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default: + Ret = new DefaultPriorityAdvisorAnalysis(/*NotAsRequested*/ false); + break; + case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Development: +#if defined(LLVM_HAVE_TFLITE) + Ret = createDevelopmentModePriorityAdvisor(); +#endif + break; + case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Release: +#if defined(LLVM_HAVE_TF_AOT_REGALLOCPRIORITYMODEL) + Ret = createReleaseModePriorityAdvisor(); +#endif + break; + } + if (Ret) + return Ret; + return new DefaultPriorityAdvisorAnalysis(/*NotAsRequested*/ true); +} + +StringRef RegAllocPriorityAdvisorAnalysis::getPassName() const { + switch (getAdvisorMode()) { + case AdvisorMode::Default: + return "Default Regalloc Priority Advisor"; + case AdvisorMode::Release: + return "Release mode Regalloc Priority Advisor"; + case AdvisorMode::Development: + return "Development mode Regalloc Priority Advisor"; + } + llvm_unreachable("Unknown advisor kind"); +} + +RegAllocPriorityAdvisor::RegAllocPriorityAdvisor(const MachineFunction &MF, + const RAGreedy &RA, + SlotIndexes *const Indexes) + : RA(RA), LIS(RA.getLiveIntervals()), VRM(RA.getVirtRegMap()), + MRI(&VRM->getRegInfo()), TRI(MF.getSubtarget().getRegisterInfo()), + RegClassInfo(RA.getRegClassInfo()), Indexes(Indexes), + RegClassPriorityTrumpsGlobalness( + RA.getRegClassPriorityTrumpsGlobalness()), + ReverseLocalAssignment(RA.getReverseLocalAssignment()) {} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h new file mode 100644 index 000000000000..1e9fa967214c --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h @@ -0,0 +1,96 @@ +//===- RegAllocPriorityAdvisor.h - live ranges priority advisor -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_REGALLOCPRIORITYADVISOR_H +#define LLVM_CODEGEN_REGALLOCPRIORITYADVISOR_H + +#include "RegAllocEvictionAdvisor.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/Pass.h" + +namespace llvm { + +class MachineFunction; +class VirtRegMap; +class RAGreedy; + +/// Interface to the priority advisor, which is responsible for prioritizing +/// live ranges. +class RegAllocPriorityAdvisor { +public: + RegAllocPriorityAdvisor(const RegAllocPriorityAdvisor &) = delete; + RegAllocPriorityAdvisor(RegAllocPriorityAdvisor &&) = delete; + virtual ~RegAllocPriorityAdvisor() = default; + + /// Find the priority value for a live range. A float value is used since ML + /// prefers it. + virtual unsigned getPriority(const LiveInterval &LI) const = 0; + + RegAllocPriorityAdvisor(const MachineFunction &MF, const RAGreedy &RA, + SlotIndexes *const Indexes); + +protected: + const RAGreedy &RA; + LiveIntervals *const LIS; + VirtRegMap *const VRM; + MachineRegisterInfo *const MRI; + const TargetRegisterInfo *const TRI; + const RegisterClassInfo &RegClassInfo; + SlotIndexes *const Indexes; + const bool RegClassPriorityTrumpsGlobalness; + const bool ReverseLocalAssignment; +}; + +class DefaultPriorityAdvisor : public RegAllocPriorityAdvisor { +public: + DefaultPriorityAdvisor(const MachineFunction &MF, const RAGreedy &RA, + SlotIndexes *const Indexes) + : RegAllocPriorityAdvisor(MF, RA, Indexes) {} + +private: + unsigned getPriority(const LiveInterval &LI) const override; +}; + +class RegAllocPriorityAdvisorAnalysis : public ImmutablePass { +public: + enum class AdvisorMode : int { Default, Release, Development }; + + RegAllocPriorityAdvisorAnalysis(AdvisorMode Mode) + : ImmutablePass(ID), Mode(Mode){}; + static char ID; + + /// Get an advisor for the given context (i.e. machine function, etc) + virtual std::unique_ptr<RegAllocPriorityAdvisor> + getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0; + AdvisorMode getAdvisorMode() const { return Mode; } + virtual void logRewardIfNeeded(const MachineFunction &MF, + llvm::function_ref<float()> GetReward){}; + +protected: + // This analysis preserves everything, and subclasses may have additional + // requirements. + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + +private: + StringRef getPassName() const override; + const AdvisorMode Mode; +}; + +/// Specialization for the API used by the analysis infrastructure to create +/// an instance of the priority advisor. +template <> Pass *callDefaultCtor<RegAllocPriorityAdvisorAnalysis>(); + +RegAllocPriorityAdvisorAnalysis *createReleaseModePriorityAdvisor(); + +RegAllocPriorityAdvisorAnalysis *createDevelopmentModePriorityAdvisor(); + +} // namespace llvm + +#endif // LLVM_CODEGEN_REGALLOCPRIORITYADVISOR_H diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp index 17e3eeef664b..e420283dfcfa 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp @@ -14,8 +14,6 @@ #include "RegAllocScore.h" #include "llvm/ADT/DenseMapInfo.h" -#include "llvm/ADT/STLForwardCompat.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/ilist_iterator.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp index de851ffc7fdc..27ed17b9f4f6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp @@ -79,7 +79,7 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const { const RegisterBank * RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const { - if (Register::isPhysicalRegister(Reg)) { + if (Reg.isPhysical()) { // FIXME: This was probably a copy to a virtual register that does have a // type we could use. return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI), LLT()); @@ -97,7 +97,7 @@ RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterClass & RegisterBankInfo::getMinimalPhysRegClass(Register Reg, const TargetRegisterInfo &TRI) const { - assert(Register::isPhysicalRegister(Reg) && "Reg must be a physreg"); + assert(Reg.isPhysical() && "Reg must be a physreg"); const auto &RegRCIt = PhysRegMinimalRCs.find(Reg); if (RegRCIt != PhysRegMinimalRCs.end()) return *RegRCIt->second; @@ -449,6 +449,9 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { LLVM_DEBUG(dbgs() << " is $noreg, nothing to be done\n"); continue; } + LLT Ty = MRI.getType(MO.getReg()); + if (!Ty.isValid()) + continue; assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns != 0 && "Invalid mapping"); @@ -490,7 +493,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { unsigned RegisterBankInfo::getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const { - if (Register::isPhysicalRegister(Reg)) { + if (Reg.isPhysical()) { // The size is not directly available for physical registers. // Instead, we need to access a register class that contains Reg and // get the size of that register class. @@ -601,6 +604,7 @@ bool RegisterBankInfo::InstructionMapping::verify( const MachineFunction &MF = *MI.getMF(); const RegisterBankInfo *RBI = MF.getSubtarget().getRegBankInfo(); (void)RBI; + const MachineRegisterInfo &MRI = MF.getRegInfo(); for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { const MachineOperand &MO = MI.getOperand(Idx); @@ -612,6 +616,9 @@ bool RegisterBankInfo::InstructionMapping::verify( Register Reg = MO.getReg(); if (!Reg) continue; + LLT Ty = MRI.getType(Reg); + if (!Ty.isValid()) + continue; assert(getOperandMapping(Idx).isValid() && "We must have a mapping for reg operands"); const RegisterBankInfo::ValueMapping &MOMapping = getOperandMapping(Idx); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp index 374fcc9a6014..fba8c35ecec2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp @@ -52,22 +52,43 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { Update = true; } - // Does this MF have different CSRs? - assert(TRI && "no register info set"); + // Test if CSRs have changed from the previous function. + const MachineRegisterInfo &MRI = MF->getRegInfo(); + const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); + bool CSRChanged = true; + if (!Update) { + CSRChanged = false; + size_t LastSize = LastCalleeSavedRegs.size(); + for (unsigned I = 0;; ++I) { + if (CSR[I] == 0) { + CSRChanged = I != LastSize; + break; + } + if (I >= LastSize) { + CSRChanged = true; + break; + } + if (CSR[I] != LastCalleeSavedRegs[I]) { + CSRChanged = true; + break; + } + } + } // Get the callee saved registers. - const MCPhysReg *CSR = MF->getRegInfo().getCalleeSavedRegs(); - if (Update || CSR != CalleeSavedRegs) { + if (CSRChanged) { + LastCalleeSavedRegs.clear(); // Build a CSRAlias map. Every CSR alias saves the last // overlapping CSR. CalleeSavedAliases.assign(TRI->getNumRegs(), 0); - for (const MCPhysReg *I = CSR; *I; ++I) + for (const MCPhysReg *I = CSR; *I; ++I) { for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) CalleeSavedAliases[*AI] = *I; + LastCalleeSavedRegs.push_back(*I); + } Update = true; } - CalleeSavedRegs = CSR; // Even if CSR list is same, we could have had a different allocation order // if ignoreCSRForAllocationOrder is evaluated differently. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp index 8a6f823c8a0c..ab1215974fc5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -199,12 +199,7 @@ namespace { DenseMap<Register, unsigned long> LargeLIVisitCounter; /// Recursively eliminate dead defs in DeadDefs. - void eliminateDeadDefs(); - - /// allUsesAvailableAt - Return true if all registers used by OrigMI at - /// OrigIdx are also available with the same value at UseIdx. - bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, - SlotIndex UseIdx); + void eliminateDeadDefs(LiveRangeEdit *Edit = nullptr); /// LiveRangeEdit callback for eliminateDeadDefs(). void LRE_WillEraseInstruction(MachineInstr *MI) override; @@ -418,24 +413,24 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", "Simple Register Coalescing", false, false) -LLVM_NODISCARD static bool isMoveInstr(const TargetRegisterInfo &tri, - const MachineInstr *MI, Register &Src, - Register &Dst, unsigned &SrcSub, - unsigned &DstSub) { - if (MI->isCopy()) { - Dst = MI->getOperand(0).getReg(); - DstSub = MI->getOperand(0).getSubReg(); - Src = MI->getOperand(1).getReg(); - SrcSub = MI->getOperand(1).getSubReg(); - } else if (MI->isSubregToReg()) { - Dst = MI->getOperand(0).getReg(); - DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(), - MI->getOperand(3).getImm()); - Src = MI->getOperand(2).getReg(); - SrcSub = MI->getOperand(2).getSubReg(); - } else - return false; - return true; +[[nodiscard]] static bool isMoveInstr(const TargetRegisterInfo &tri, + const MachineInstr *MI, Register &Src, + Register &Dst, unsigned &SrcSub, + unsigned &DstSub) { + if (MI->isCopy()) { + Dst = MI->getOperand(0).getReg(); + DstSub = MI->getOperand(0).getSubReg(); + Src = MI->getOperand(1).getReg(); + SrcSub = MI->getOperand(1).getSubReg(); + } else if (MI->isSubregToReg()) { + Dst = MI->getOperand(0).getReg(); + DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(), + MI->getOperand(3).getImm()); + Src = MI->getOperand(2).getReg(); + SrcSub = MI->getOperand(2).getSubReg(); + } else + return false; + return true; } /// Return true if this block should be vacated by the coalescer to eliminate @@ -467,8 +462,8 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { Partial = SrcSub || DstSub; // If one register is a physreg, it must be Dst. - if (Register::isPhysicalRegister(Src)) { - if (Register::isPhysicalRegister(Dst)) + if (Src.isPhysical()) { + if (Dst.isPhysical()) return false; std::swap(Src, Dst); std::swap(SrcSub, DstSub); @@ -477,7 +472,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); - if (Register::isPhysicalRegister(Dst)) { + if (Dst.isPhysical()) { // Eliminate DstSub on a physreg. if (DstSub) { Dst = TRI.getSubReg(Dst, DstSub); @@ -535,16 +530,15 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { CrossClass = NewRC != DstRC || NewRC != SrcRC; } // Check our invariants - assert(Register::isVirtualRegister(Src) && "Src must be virtual"); - assert(!(Register::isPhysicalRegister(Dst) && DstSub) && - "Cannot have a physical SubIdx"); + assert(Src.isVirtual() && "Src must be virtual"); + assert(!(Dst.isPhysical() && DstSub) && "Cannot have a physical SubIdx"); SrcReg = Src; DstReg = Dst; return true; } bool CoalescerPair::flip() { - if (Register::isPhysicalRegister(DstReg)) + if (DstReg.isPhysical()) return false; std::swap(SrcReg, DstReg); std::swap(SrcIdx, DstIdx); @@ -603,20 +597,16 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -void RegisterCoalescer::eliminateDeadDefs() { +void RegisterCoalescer::eliminateDeadDefs(LiveRangeEdit *Edit) { + if (Edit) { + Edit->eliminateDeadDefs(DeadDefs); + return; + } SmallVector<Register, 8> NewRegs; LiveRangeEdit(nullptr, NewRegs, *MF, *LIS, nullptr, this).eliminateDeadDefs(DeadDefs); } -bool RegisterCoalescer::allUsesAvailableAt(const MachineInstr *OrigMI, - SlotIndex OrigIdx, - SlotIndex UseIdx) { - SmallVector<Register, 8> NewRegs; - return LiveRangeEdit(nullptr, NewRegs, *MF, *LIS, nullptr, this) - .allUsesAvailableAt(OrigMI, OrigIdx, UseIdx); -} - void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) { // MI may be in WorkList. Make sure we don't visit it. ErasedInstrs.insert(MI); @@ -911,8 +901,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx); if (!NewMI) return { false, false }; - if (Register::isVirtualRegister(IntA.reg()) && - Register::isVirtualRegister(IntB.reg()) && + if (IntA.reg().isVirtual() && IntB.reg().isVirtual() && !MRI->constrainRegClass(IntB.reg(), MRI->getRegClass(IntA.reg()))) return { false, false }; if (NewMI != DefMI) { @@ -950,7 +939,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, continue; // Kill flags are no longer accurate. They are recomputed after RA. UseMO.setIsKill(false); - if (Register::isPhysicalRegister(NewReg)) + if (NewReg.isPhysical()) UseMO.substPhysReg(NewReg, *TRI); else UseMO.setReg(NewReg); @@ -1287,7 +1276,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx(); Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx(); - if (Register::isPhysicalRegister(SrcReg)) + if (SrcReg.isPhysical()) return false; LiveInterval &SrcInt = LIS->getInterval(SrcReg); @@ -1306,8 +1295,12 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } if (!TII->isAsCheapAsAMove(*DefMI)) return false; - if (!TII->isTriviallyReMaterializable(*DefMI)) + + SmallVector<Register, 8> NewRegs; + LiveRangeEdit Edit(&SrcInt, NewRegs, *MF, *LIS, nullptr, this); + if (!Edit.checkRematerializable(ValNo, DefMI)) return false; + if (!definesFullReg(*DefMI, SrcReg)) return false; bool SawStore = false; @@ -1347,19 +1340,21 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } else { // Theoretically, some stack frame reference could exist. Just make sure // it hasn't actually happened. - assert(Register::isVirtualRegister(DstReg) && + assert(DstReg.isVirtual() && "Only expect to deal with virtual or physical registers"); } } - if (!allUsesAvailableAt(DefMI, ValNo->def, CopyIdx)) + LiveRangeEdit::Remat RM(ValNo); + RM.OrigMI = DefMI; + if (!Edit.canRematerializeAt(RM, ValNo, CopyIdx, true)) return false; DebugLoc DL = CopyMI->getDebugLoc(); MachineBasicBlock *MBB = CopyMI->getParent(); MachineBasicBlock::iterator MII = std::next(MachineBasicBlock::iterator(CopyMI)); - TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, *DefMI, *TRI); + Edit.rematerializeAt(*MBB, MII, DstReg, RM, *TRI, false, SrcIdx, CopyMI); MachineInstr &NewMI = *std::prev(MII); NewMI.setDebugLoc(DL); @@ -1379,8 +1374,18 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, TRI->getCommonSubClass(DefRC, DstRC); if (CommonRC != nullptr) { NewRC = CommonRC; + + // Instruction might contain "undef %0:subreg" as use operand: + // %0:subreg = instr op_1, ..., op_N, undef %0:subreg, op_N+2, ... + // + // Need to check all operands. + for (MachineOperand &MO : NewMI.operands()) { + if (MO.isReg() && MO.getReg() == DstReg && MO.getSubReg() == DstIdx) { + MO.setSubReg(0); + } + } + DstIdx = 0; - DefMO.setSubReg(0); DefMO.setIsUndef(false); // Only subregs can have def+undef. } } @@ -1398,12 +1403,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, if (MO.isReg()) { assert(MO.isImplicit() && "No explicit operands after implicit operands."); // Discard VReg implicit defs. - if (Register::isPhysicalRegister(MO.getReg())) + if (MO.getReg().isPhysical()) ImplicitOps.push_back(MO); } } - LIS->ReplaceMachineInstrInMaps(*CopyMI, NewMI); CopyMI->eraseFromParent(); ErasedInstrs.insert(CopyMI); @@ -1416,8 +1420,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, i != e; ++i) { MachineOperand &MO = NewMI.getOperand(i); if (MO.isReg() && MO.isDef()) { - assert(MO.isImplicit() && MO.isDead() && - Register::isPhysicalRegister(MO.getReg())); + assert(MO.isImplicit() && MO.isDead() && MO.getReg().isPhysical()); NewMIImplDefs.push_back(MO.getReg().asMCReg()); } } @@ -1520,7 +1523,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } else if (NewMI.getOperand(0).getReg() != CopyDstReg) { // The New instruction may be defining a sub-register of what's actually // been asked for. If so it must implicitly define the whole thing. - assert(Register::isPhysicalRegister(DstReg) && + assert(DstReg.isPhysical() && "Only expect virtual or physical registers in remat"); NewMI.getOperand(0).setIsDead(true); NewMI.addOperand(MachineOperand::CreateReg( @@ -1573,7 +1576,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, llvm::make_early_inc_range(MRI->use_operands(SrcReg))) { MachineInstr *UseMI = UseMO.getParent(); if (UseMI->isDebugInstr()) { - if (Register::isPhysicalRegister(DstReg)) + if (DstReg.isPhysical()) UseMO.substPhysReg(DstReg, *TRI); else UseMO.setReg(DstReg); @@ -1597,7 +1600,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // The source interval can become smaller because we removed a use. shrinkToUses(&SrcInt, &DeadDefs); if (!DeadDefs.empty()) - eliminateDeadDefs(); + eliminateDeadDefs(&Edit); } else { ToBeUpdated.insert(SrcReg); } @@ -1641,18 +1644,20 @@ MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { SlotIndex RegIndex = Idx.getRegSlot(); LiveRange::Segment *Seg = DstLI.getSegmentContaining(RegIndex); assert(Seg != nullptr && "No segment for defining instruction"); - if (VNInfo *V = DstLI.getVNInfoAt(Seg->end)) { - if (V->isPHIDef()) { - CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); - for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) { - MachineOperand &MO = CopyMI->getOperand(i-1); - if (MO.isReg() && MO.isUse()) - CopyMI->removeOperand(i-1); - } - LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an " - "implicit def\n"); - return CopyMI; + VNInfo *V = DstLI.getVNInfoAt(Seg->end); + + // The source interval may also have been on an undef use, in which case the + // copy introduced a live value. + if (((V && V->isPHIDef()) || (!V && !DstLI.liveAt(Idx)))) { + CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); + for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) { + MachineOperand &MO = CopyMI->getOperand(i-1); + if (MO.isReg() && MO.isUse()) + CopyMI->removeOperand(i-1); } + LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an " + "implicit def\n"); + return CopyMI; } // Remove any DstReg segments starting at the instruction. @@ -1744,7 +1749,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx) { - bool DstIsPhys = Register::isPhysicalRegister(DstReg); + bool DstIsPhys = DstReg.isPhysical(); LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) { @@ -2103,6 +2108,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask) << ")\n"); LIS->shrinkToUses(S, LI.reg()); + ShrinkMainRange = true; } LI.removeEmptySubRanges(); } @@ -2742,8 +2748,10 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { } V.OtherVNI = OtherVNI; Val &OtherV = Other.Vals[OtherVNI->id]; - // Keep this value, check for conflicts when analyzing OtherVNI. - if (!OtherV.isAnalyzed()) + // Keep this value, check for conflicts when analyzing OtherVNI. Avoid + // revisiting OtherVNI->id in JoinVals::computeAssignment() below before it + // is assigned. + if (!OtherV.isAnalyzed() || Other.Assignments[OtherVNI->id] == -1) return CR_Keep; // Both sides have been analyzed now. // Allow overlapping PHI values. Any real interference would show up in a @@ -2955,7 +2963,7 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) { } OtherV.Pruned = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; } default: // This value number needs to go in the final joined live range. @@ -3398,7 +3406,7 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, if (LI != nullptr) dbgs() << "\t\t LHS = " << *LI << '\n'; }); - LLVM_FALLTHROUGH; + [[fallthrough]]; } case CR_Erase: { @@ -3406,8 +3414,7 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, assert(MI && "No instruction to erase"); if (MI->isCopy()) { Register Reg = MI->getOperand(1).getReg(); - if (Register::isVirtualRegister(Reg) && Reg != CP.getSrcReg() && - Reg != CP.getDstReg()) + if (Reg.isVirtual() && Reg != CP.getSrcReg() && Reg != CP.getDstReg()) ShrinkRegs.push_back(Reg); } ErasedInstrs.insert(MI); @@ -3885,8 +3892,7 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) { Register SrcReg = Copy->getOperand(1).getReg(); Register DstReg = Copy->getOperand(0).getReg(); - if (Register::isPhysicalRegister(SrcReg) || - Register::isPhysicalRegister(DstReg)) + if (SrcReg.isPhysical() || DstReg.isPhysical()) return false; return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg)) @@ -3975,8 +3981,7 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { if (OtherReg == SrcReg) OtherReg = OtherSrcReg; // Check if OtherReg is a non-terminal. - if (Register::isPhysicalRegister(OtherReg) || - isTerminalReg(OtherReg, MI, MRI)) + if (OtherReg.isPhysical() || isTerminalReg(OtherReg, MI, MRI)) continue; // Check that OtherReg interfere with DstReg. if (LIS->getInterval(OtherReg).overlaps(DstLI)) { @@ -4107,7 +4112,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { // calls if (fn.exposesReturnsTwice()) { LLVM_DEBUG( - dbgs() << "* Skipped as it exposes funcions that returns twice.\n"); + dbgs() << "* Skipped as it exposes functions that returns twice.\n"); return false; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp index b14a36e4eeb4..d4c29f96a4f9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp @@ -361,8 +361,7 @@ void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) { assert(isBottomClosed() && "need bottom-up tracking to intialize."); for (const RegisterMaskPair &Pair : P.LiveOutRegs) { Register RegUnit = Pair.RegUnit; - if (Register::isVirtualRegister(RegUnit) - && !RPTracker.hasUntiedDef(RegUnit)) + if (RegUnit.isVirtual() && !RPTracker.hasUntiedDef(RegUnit)) increaseSetPressure(LiveThruPressure, *MRI, RegUnit, LaneBitmask::getNone(), Pair.LaneMask); } @@ -608,8 +607,8 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, // If the def is all that is live after the instruction, then in case // of a subregister def we need a read-undef flag. Register RegUnit = I->RegUnit; - if (Register::isVirtualRegister(RegUnit) && - AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask).none()) + if (RegUnit.isVirtual() && AddFlagsMI != nullptr && + (LiveAfter & ~I->LaneMask).none()) AddFlagsMI->setRegisterDefReadUndef(RegUnit); LaneBitmask ActualDef = I->LaneMask & LiveAfter; @@ -634,7 +633,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, if (AddFlagsMI != nullptr) { for (const RegisterMaskPair &P : DeadDefs) { Register RegUnit = P.RegUnit; - if (!Register::isVirtualRegister(RegUnit)) + if (!RegUnit.isVirtual()) continue; LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, RegUnit, Pos.getDeadSlot()); @@ -843,7 +842,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers, if (TrackUntiedDefs) { for (const RegisterMaskPair &Def : RegOpers.Defs) { Register RegUnit = Def.RegUnit; - if (Register::isVirtualRegister(RegUnit) && + if (RegUnit.isVirtual() && (LiveRegs.contains(RegUnit) & Def.LaneMask).none()) UntiedDefs.insert(RegUnit); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp index 289d31be2d2d..8d10a5558315 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -184,7 +184,7 @@ void RegScavenger::forward() { if (!MO.isReg()) continue; Register Reg = MO.getReg(); - if (!Register::isPhysicalRegister(Reg) || isReserved(Reg)) + if (!Reg.isPhysical() || isReserved(Reg)) continue; if (MO.isUse()) { if (MO.isUndef()) @@ -308,7 +308,7 @@ Register RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, Candidates.clearBitsNotInMask(MO.getRegMask()); if (!MO.isReg() || MO.isUndef() || !MO.getReg()) continue; - if (Register::isVirtualRegister(MO.getReg())) { + if (MO.getReg().isVirtual()) { if (MO.isDef()) isVirtDefInsn = true; else if (MO.isKill()) @@ -394,6 +394,13 @@ findSurvivorBackwards(const MachineRegisterInfo &MRI, Used.accumulate(*std::next(From)); } if (FoundTo) { + // Don't search to FrameSetup instructions if we were searching from + // Non-FrameSetup instructions. Otherwise, the spill position may point + // before FrameSetup instructions. + if (!From->getFlag(MachineInstr::FrameSetup) && + MI.getFlag(MachineInstr::FrameSetup)) + break; + if (Survivor == 0 || !Used.available(Survivor)) { MCPhysReg AvilableReg = 0; for (MCPhysReg Reg : AllocationOrder) { @@ -413,7 +420,7 @@ findSurvivorBackwards(const MachineRegisterInfo &MRI, // be usefull for this other vreg as well later. bool FoundVReg = false; for (const MachineOperand &MO : MI.operands()) { - if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) { + if (MO.isReg() && MO.getReg().isVirtual()) { FoundVReg = true; break; } @@ -499,14 +506,14 @@ RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj, ": Cannot scavenge register without an emergency " "spill slot!"); } - TII->storeRegToStackSlot(*MBB, Before, Reg, true, FI, &RC, TRI); + TII->storeRegToStackSlot(*MBB, Before, Reg, true, FI, &RC, TRI, Register()); MachineBasicBlock::iterator II = std::prev(Before); unsigned FIOperandNum = getFrameIndexOperandNum(*II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); // Restore the scavenged register before its use (or first terminator). - TII->loadRegFromStackSlot(*MBB, UseMI, Reg, FI, &RC, TRI); + TII->loadRegFromStackSlot(*MBB, UseMI, Reg, FI, &RC, TRI, Register()); II = std::prev(UseMI); FIOperandNum = getFrameIndexOperandNum(*II); @@ -526,7 +533,7 @@ Register RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // Exclude all the registers being used by the instruction. for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) && - !Register::isVirtualRegister(MO.getReg())) + !MO.getReg().isVirtual()) for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) Candidates.reset(*AI); } @@ -704,7 +711,7 @@ static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI, // We only care about virtual registers and ignore virtual registers // created by the target callbacks in the process (those will be handled // in a scavenging round). - if (!Register::isVirtualRegister(Reg) || + if (!Reg.isVirtual() || Register::virtReg2Index(Reg) >= InitialNumVirtRegs) continue; if (!MO.readsReg()) @@ -724,7 +731,7 @@ static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI, continue; Register Reg = MO.getReg(); // Only vregs, no newly created vregs (see above). - if (!Register::isVirtualRegister(Reg) || + if (!Reg.isVirtual() || Register::virtReg2Index(Reg) >= InitialNumVirtRegs) continue; // We have to look at all operands anyway so we can precalculate here @@ -743,7 +750,7 @@ static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI, } #ifndef NDEBUG for (const MachineOperand &MO : MBB.front().operands()) { - if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !MO.getReg().isVirtual()) continue; assert(!MO.isInternalRead() && "Cannot assign inside bundles"); assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterUsageInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterUsageInfo.cpp index 9d9cdf9edbb3..51bac3fc0a23 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterUsageInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterUsageInfo.cpp @@ -63,7 +63,7 @@ ArrayRef<uint32_t> PhysicalRegisterUsageInfo::getRegUsageInfo(const Function &FP) { auto It = RegMasks.find(&FP); if (It != RegMasks.end()) - return makeArrayRef<uint32_t>(It->second); + return ArrayRef<uint32_t>(It->second); return ArrayRef<uint32_t>(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp index 01886e40a4a3..feb31e59f5fd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp @@ -89,7 +89,7 @@ static bool reduceDbgValsForwardScan(MachineBasicBlock &MBB) { for (auto &MI : MBB) { if (MI.isDebugValue()) { - DebugVariable Var(MI.getDebugVariable(), NoneType(), + DebugVariable Var(MI.getDebugVariable(), std::nullopt, MI.getDebugLoc()->getInlinedAt()); auto VMI = VariableMap.find(Var); // Just stop tracking this variable, until we cover DBG_VALUE_LIST. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp index 466022ae0ac1..05bbd1a2d03b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp @@ -130,7 +130,7 @@ bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const { return false; // Create a new VReg for each class. - unsigned Reg = LI.reg(); + Register Reg = LI.reg(); const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); SmallVector<LiveInterval*, 4> Intervals; Intervals.push_back(&LI); @@ -175,7 +175,7 @@ bool RenameIndependentSubregs::findComponents(IntEqClasses &Classes, // across subranges when they are affected by the same MachineOperand. const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); Classes.grow(NumComponents); - unsigned Reg = LI.reg(); + Register Reg = LI.reg(); for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { if (!MO.isDef() && !MO.readsReg()) continue; @@ -304,7 +304,7 @@ void RenameIndependentSubregs::computeMainRangesFixFlags( const SlotIndexes &Indexes = *LIS->getSlotIndexes(); for (size_t I = 0, E = Intervals.size(); I < E; ++I) { LiveInterval &LI = *Intervals[I]; - unsigned Reg = LI.reg(); + Register Reg = LI.reg(); LI.removeEmptySubRanges(); @@ -391,7 +391,7 @@ bool RenameIndependentSubregs::runOnMachineFunction(MachineFunction &MF) { // there can't be any further splitting. bool Changed = false; for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) { - unsigned Reg = Register::index2VirtReg(I); + Register Reg = Register::index2VirtReg(I); if (!LIS->hasInterval(Reg)) continue; LiveInterval &LI = LIS->getInterval(Reg); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp index 0f73973c8a51..0ad6ef84220a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp @@ -66,6 +66,8 @@ namespace { LLVM_DEBUG(dbgs() << "Resetting: " << MF.getName() << '\n'); ++NumFunctionsReset; MF.reset(); + MF.initTargetMachineFunctionInfo(MF.getSubtarget()); + if (EmitFallbackDiag) { const Function &F = MF.getFunction(); DiagnosticInfoISelFallback DiagFallback(F); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp index 00a551ade213..bcad7a3f24da 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp @@ -67,6 +67,7 @@ #include <algorithm> #include <cassert> #include <cstdint> +#include <optional> #include <string> #include <utility> @@ -896,7 +897,7 @@ public: DominatorTree *DT; bool ShouldPreserveDominatorTree; - Optional<DominatorTree> LazilyComputedDomTree; + std::optional<DominatorTree> LazilyComputedDomTree; // Do we already have a DominatorTree avaliable from the previous pass? // Note that we should *NOT* require it, to avoid the case where we end up @@ -907,7 +908,7 @@ public: } else { // Otherwise, we need to compute it. LazilyComputedDomTree.emplace(F); - DT = LazilyComputedDomTree.getPointer(); + DT = &*LazilyComputedDomTree; ShouldPreserveDominatorTree = false; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp new file mode 100644 index 000000000000..dd70a2f23e45 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp @@ -0,0 +1,80 @@ +//===- SanitizerBinaryMetadata.cpp +//----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of SanitizerBinaryMetadata. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include <algorithm> + +using namespace llvm; + +namespace { +class MachineSanitizerBinaryMetadata : public MachineFunctionPass { +public: + static char ID; + + MachineSanitizerBinaryMetadata(); + bool runOnMachineFunction(MachineFunction &F) override; +}; +} // namespace + +INITIALIZE_PASS(MachineSanitizerBinaryMetadata, "machine-sanmd", + "Machine Sanitizer Binary Metadata", false, false) + +char MachineSanitizerBinaryMetadata::ID = 0; +char &llvm::MachineSanitizerBinaryMetadataID = + MachineSanitizerBinaryMetadata::ID; + +MachineSanitizerBinaryMetadata::MachineSanitizerBinaryMetadata() + : MachineFunctionPass(ID) { + initializeMachineSanitizerBinaryMetadataPass( + *PassRegistry::getPassRegistry()); +} + +bool MachineSanitizerBinaryMetadata::runOnMachineFunction(MachineFunction &MF) { + MDNode *MD = MF.getFunction().getMetadata(LLVMContext::MD_pcsections); + if (!MD) + return false; + const auto &Section = *cast<MDString>(MD->getOperand(0)); + if (!Section.getString().equals(kSanitizerBinaryMetadataCoveredSection)) + return false; + auto &AuxMDs = *cast<MDTuple>(MD->getOperand(1)); + // Assume it currently only has features. + assert(AuxMDs.getNumOperands() == 1); + auto *Features = cast<ConstantAsMetadata>(AuxMDs.getOperand(0))->getValue(); + if (!Features->getUniqueInteger()[kSanitizerBinaryMetadataUARBit]) + return false; + // Calculate size of stack args for the function. + int64_t Size = 0; + uint64_t Align = 0; + const MachineFrameInfo &MFI = MF.getFrameInfo(); + for (int i = -1; i >= (int)-MFI.getNumFixedObjects(); --i) { + Size = std::max(Size, MFI.getObjectOffset(i) + MFI.getObjectSize(i)); + Align = std::max(Align, MFI.getObjectAlign(i).value()); + } + Size = (Size + Align - 1) & ~(Align - 1); + auto &F = MF.getFunction(); + IRBuilder<> IRB(F.getContext()); + MDBuilder MDB(F.getContext()); + // Keep the features and append size of stack args to the metadata. + F.setMetadata(LLVMContext::MD_pcsections, + MDB.createPCSections( + {{Section.getString(), {Features, IRB.getInt32(Size)}}})); + return false; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 4fc9399c2b9e..1b213e87e75c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/ScheduleDAGInstrs.h" + #include "llvm/ADT/IntEqClasses.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" @@ -53,7 +54,6 @@ #include <algorithm> #include <cassert> #include <iterator> -#include <string> #include <utility> #include <vector> @@ -84,6 +84,12 @@ static cl::opt<unsigned> ReductionSize( cl::desc("A huge scheduling region will have maps reduced by this many " "nodes at a time. Defaults to HugeRegion / 2.")); +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +static cl::opt<bool> SchedPrintCycles( + "sched-print-cycles", cl::Hidden, cl::init(false), + cl::desc("Report top/bottom cycles when dumping SUnit instances")); +#endif + static unsigned getReductionSize() { // Always reduce a huge region with half of the elements, except // when user sets this number explicitly. @@ -92,12 +98,12 @@ static unsigned getReductionSize() { return ReductionSize; } -static void dumpSUList(ScheduleDAGInstrs::SUList &L) { +static void dumpSUList(const ScheduleDAGInstrs::SUList &L) { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) dbgs() << "{ "; - for (const SUnit *su : L) { - dbgs() << "SU(" << su->NodeNum << ")"; - if (su != L.back()) + for (const SUnit *SU : L) { + dbgs() << "SU(" << SU->NodeNum << ")"; + if (SU != L.back()) dbgs() << ", "; } dbgs() << "}\n"; @@ -125,7 +131,7 @@ static bool getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo &MFI, UnderlyingObjectsVector &Objects, const DataLayout &DL) { - auto allMMOsOkay = [&]() { + auto AllMMOsOkay = [&]() { for (const MachineMemOperand *MMO : MI->memoperands()) { // TODO: Figure out whether isAtomic is really necessary (see D57601). if (MMO->isVolatile() || MMO->isAtomic()) @@ -147,7 +153,7 @@ static bool getUnderlyingObjectsForInstr(const MachineInstr *MI, return false; bool MayAlias = PSV->mayAlias(&MFI); - Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias)); + Objects.emplace_back(PSV, MayAlias); } else if (const Value *V = MMO->getValue()) { SmallVector<Value *, 4> Objs; if (!getUnderlyingObjectsForCodeGen(V, Objs)) @@ -155,7 +161,7 @@ static bool getUnderlyingObjectsForInstr(const MachineInstr *MI, for (Value *V : Objs) { assert(isIdentifiedObject(V)); - Objects.push_back(UnderlyingObjectsVector::value_type(V, true)); + Objects.emplace_back(V, true); } } else return false; @@ -163,7 +169,7 @@ static bool getUnderlyingObjectsForInstr(const MachineInstr *MI, return true; }; - if (!allMMOsOkay()) { + if (!AllMMOsOkay()) { Objects.clear(); return false; } @@ -205,9 +211,9 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { for (const MachineOperand &MO : ExitMI->operands()) { if (!MO.isReg() || MO.isDef()) continue; Register Reg = MO.getReg(); - if (Register::isPhysicalRegister(Reg)) { + if (Reg.isPhysical()) { Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); - } else if (Register::isVirtualRegister(Reg) && MO.readsReg()) { + } else if (Reg.isVirtual() && MO.readsReg()) { addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO)); } } @@ -676,9 +682,9 @@ void ScheduleDAGInstrs::addChainDependencies(SUnit *SU, void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) { assert(BarrierChain != nullptr); - for (auto &I : map) { - SUList &sus = I.second; - for (auto *SU : sus) + for (auto &[V, SUs] : map) { + (void)V; + for (auto *SU : SUs) SU->addPredBarrier(BarrierChain); } map.clear(); @@ -793,7 +799,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA, MII != MIE; --MII) { MachineInstr &MI = *std::prev(MII); if (DbgMI) { - DbgValues.push_back(std::make_pair(DbgMI, &MI)); + DbgValues.emplace_back(DbgMI, &MI); DbgMI = nullptr; } @@ -839,9 +845,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA, if (!MO.isReg() || !MO.isDef()) continue; Register Reg = MO.getReg(); - if (Register::isPhysicalRegister(Reg)) { + if (Reg.isPhysical()) { addPhysRegDeps(SU, j); - } else if (Register::isVirtualRegister(Reg)) { + } else if (Reg.isVirtual()) { HasVRegDef = true; addVRegDefDeps(SU, j); } @@ -856,9 +862,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA, if (!MO.isReg() || !MO.isUse()) continue; Register Reg = MO.getReg(); - if (Register::isPhysicalRegister(Reg)) { + if (Reg.isPhysical()) { addPhysRegDeps(SU, j); - } else if (Register::isVirtualRegister(Reg) && MO.readsReg()) { + } else if (Reg.isVirtual() && MO.readsReg()) { addVRegUseDeps(SU, j); } } @@ -1019,21 +1025,21 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) { } void ScheduleDAGInstrs::Value2SUsMap::dump() { - for (auto &Itr : *this) { - if (Itr.first.is<const Value*>()) { - const Value *V = Itr.first.get<const Value*>(); + for (const auto &[ValType, SUs] : *this) { + if (ValType.is<const Value*>()) { + const Value *V = ValType.get<const Value*>(); if (isa<UndefValue>(V)) dbgs() << "Unknown"; else V->printAsOperand(dbgs()); } - else if (Itr.first.is<const PseudoSourceValue*>()) - dbgs() << Itr.first.get<const PseudoSourceValue*>(); + else if (ValType.is<const PseudoSourceValue*>()) + dbgs() << ValType.get<const PseudoSourceValue*>(); else llvm_unreachable("Unknown Value type."); dbgs() << " : "; - dumpSUList(Itr.second); + dumpSUList(SUs); } } @@ -1045,12 +1051,16 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores, // Insert all SU's NodeNums into a vector and sort it. std::vector<unsigned> NodeNums; NodeNums.reserve(stores.size() + loads.size()); - for (auto &I : stores) - for (auto *SU : I.second) + for (const auto &[V, SUs] : stores) { + (void)V; + for (const auto *SU : SUs) NodeNums.push_back(SU->NodeNum); - for (auto &I : loads) - for (auto *SU : I.second) + } + for (const auto &[V, SUs] : loads) { + (void)V; + for (const auto *SU : SUs) NodeNums.push_back(SU->NodeNum); + } llvm::sort(NodeNums); // The N last elements in NodeNums will be removed, and the SU with @@ -1154,6 +1164,9 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) { void ScheduleDAGInstrs::dumpNode(const SUnit &SU) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) dumpNodeName(SU); + if (SchedPrintCycles) + dbgs() << " [TopReadyCycle = " << SU.TopReadyCycle + << ", BottomReadyCycle = " << SU.BotReadyCycle << "]"; dbgs() << ": "; SU.getInstr()->dump(); #endif @@ -1308,7 +1321,7 @@ public: /// Adds a connection for cross edges. void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) { - ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ)); + ConnectionPairs.emplace_back(PredDep.getSUnit(), Succ); } /// Sets each node's subtree ID to the representative ID and record @@ -1336,12 +1349,12 @@ public: LLVM_DEBUG(dbgs() << " SU(" << Idx << ") in tree " << R.DFSNodeData[Idx].SubtreeID << '\n'); } - for (const std::pair<const SUnit*, const SUnit*> &P : ConnectionPairs) { - unsigned PredTree = SubtreeClasses[P.first->NodeNum]; - unsigned SuccTree = SubtreeClasses[P.second->NodeNum]; + for (const auto &[Pred, Succ] : ConnectionPairs) { + unsigned PredTree = SubtreeClasses[Pred->NodeNum]; + unsigned SuccTree = SubtreeClasses[Succ->NodeNum]; if (PredTree == SuccTree) continue; - unsigned Depth = P.first->getDepth(); + unsigned Depth = Pred->getDepth(); addConnection(PredTree, SuccTree, Depth); addConnection(SuccTree, PredTree, Depth); } @@ -1408,7 +1421,7 @@ public: bool isComplete() const { return DFSStack.empty(); } void follow(const SUnit *SU) { - DFSStack.push_back(std::make_pair(SU, SU->Preds.begin())); + DFSStack.emplace_back(SU, SU->Preds.begin()); } void advance() { ++DFSStack.back().second; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp index a61a2b2728fa..209c6d81f602 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -147,7 +147,7 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { case InstrStage::Required: // Required FUs conflict with both reserved and required ones freeUnits &= ~ReservedScoreboard[StageCycle]; - LLVM_FALLTHROUGH; + [[fallthrough]]; case InstrStage::Reserved: // Reserved FUs can conflict only with required ones. freeUnits &= ~RequiredScoreboard[StageCycle]; @@ -198,7 +198,7 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { case InstrStage::Required: // Required FUs conflict with both reserved and required ones freeUnits &= ~ReservedScoreboard[cycle + i]; - LLVM_FALLTHROUGH; + [[fallthrough]]; case InstrStage::Reserved: // Reserved FUs can conflict only with required ones. freeUnits &= ~RequiredScoreboard[cycle + i]; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp index 011f55efce1d..5fd78eccf732 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp @@ -10,7 +10,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" @@ -29,6 +28,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/ScaledNumber.h" @@ -180,7 +180,7 @@ private: // consisting of instructions exclusively computed for producing the operands // of the source instruction. void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice, - bool ForSinking = false); + Instruction *SI, bool ForSinking = false); // Returns true if the condition of the select is highly predictable. bool isSelectHighlyPredictable(const SelectInst *SI); @@ -199,7 +199,7 @@ private: SmallPtrSet<const Instruction *, 2> getSIset(const SelectGroups &SIGroups); // Returns the latency cost of a given instruction. - Optional<uint64_t> computeInstCost(const Instruction *I); + std::optional<uint64_t> computeInstCost(const Instruction *I); // Returns the misprediction cost of a given select when converted to branch. Scaled64 getMispredictionCost(const SelectInst *SI, const Scaled64 CondCost); @@ -242,6 +242,10 @@ bool SelectOptimize::runOnFunction(Function &F) { return false; TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + + if (!TTI->enableSelectOptimize()) + return false; + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); BPI.reset(new BranchProbabilityInfo(F, *LI)); @@ -375,13 +379,13 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { // false operands. if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue())) { std::stack<Instruction *> TrueSlice; - getExclBackwardsSlice(TI, TrueSlice, true); + getExclBackwardsSlice(TI, TrueSlice, SI, true); maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size()); TrueSlices.push_back(TrueSlice); } if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue())) { std::stack<Instruction *> FalseSlice; - getExclBackwardsSlice(FI, FalseSlice, true); + getExclBackwardsSlice(FI, FalseSlice, SI, true); maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size()); FalseSlices.push_back(FalseSlice); } @@ -514,12 +518,27 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { } } +static bool isSpecialSelect(SelectInst *SI) { + using namespace llvm::PatternMatch; + + // If the select is a logical-and/logical-or then it is better treated as a + // and/or by the backend. + if (match(SI, m_CombineOr(m_LogicalAnd(m_Value(), m_Value()), + m_LogicalOr(m_Value(), m_Value())))) + return true; + + return false; +} + void SelectOptimize::collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups) { BasicBlock::iterator BBIt = BB.begin(); while (BBIt != BB.end()) { Instruction *I = &*BBIt++; if (SelectInst *SI = dyn_cast<SelectInst>(I)) { + if (isSpecialSelect(SI)) + continue; + SelectGroup SIGroup; SIGroup.push_back(SI); while (BBIt != BB.end()) { @@ -554,6 +573,12 @@ void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups, } } +static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE, + DiagnosticInfoOptimizationBase &Rem) { + LLVM_DEBUG(dbgs() << Rem.getMsg() << "\n"); + ORE->emit(Rem); +} + void SelectOptimize::findProfitableSIGroupsInnerLoops( const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) { NumSelectOptAnalyzed += SIGroups.size(); @@ -588,7 +613,7 @@ void SelectOptimize::findProfitableSIGroupsInnerLoops( OR << "Profitable to convert to branch (loop analysis). BranchCost=" << BranchCost.toString() << ", SelectCost=" << SelectCost.toString() << ". "; - ORE->emit(OR); + EmitAndPrintRemark(ORE, OR); ++NumSelectConvertedLoop; ProfSIGroups.push_back(ASI); } else { @@ -596,7 +621,7 @@ void SelectOptimize::findProfitableSIGroupsInnerLoops( ORmiss << "Select is more profitable (loop analysis). BranchCost=" << BranchCost.toString() << ", SelectCost=" << SelectCost.toString() << ". "; - ORE->emit(ORmiss); + EmitAndPrintRemark(ORE, ORmiss); } } } @@ -604,6 +629,7 @@ void SelectOptimize::findProfitableSIGroupsInnerLoops( bool SelectOptimize::isConvertToBranchProfitableBase( const SmallVector<SelectInst *, 2> &ASI) { SelectInst *SI = ASI.front(); + LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI << "\n"); OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI); OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI); @@ -611,7 +637,7 @@ bool SelectOptimize::isConvertToBranchProfitableBase( if (PSI->isColdBlock(SI->getParent(), BFI.get())) { ++NumSelectColdBB; ORmiss << "Not converted to branch because of cold basic block. "; - ORE->emit(ORmiss); + EmitAndPrintRemark(ORE, ORmiss); return false; } @@ -619,7 +645,7 @@ bool SelectOptimize::isConvertToBranchProfitableBase( if (SI->getMetadata(LLVMContext::MD_unpredictable)) { ++NumSelectUnPred; ORmiss << "Not converted to branch because of unpredictable branch. "; - ORE->emit(ORmiss); + EmitAndPrintRemark(ORE, ORmiss); return false; } @@ -628,7 +654,7 @@ bool SelectOptimize::isConvertToBranchProfitableBase( if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) { ++NumSelectConvertedHighPred; OR << "Converted to branch because of highly predictable branch. "; - ORE->emit(OR); + EmitAndPrintRemark(ORE, OR); return true; } @@ -637,12 +663,12 @@ bool SelectOptimize::isConvertToBranchProfitableBase( if (hasExpensiveColdOperand(ASI)) { ++NumSelectConvertedExpColdOperand; OR << "Converted to branch because of expensive cold operand."; - ORE->emit(OR); + EmitAndPrintRemark(ORE, OR); return true; } ORmiss << "Not profitable to convert to branch (base heuristic)."; - ORE->emit(ORmiss); + EmitAndPrintRemark(ORE, ORmiss); return false; } @@ -655,7 +681,7 @@ bool SelectOptimize::hasExpensiveColdOperand( const SmallVector<SelectInst *, 2> &ASI) { bool ColdOperand = false; uint64_t TrueWeight, FalseWeight, TotalWeight; - if (ASI.front()->extractProfMetadata(TrueWeight, FalseWeight)) { + if (extractBranchWeights(*ASI.front(), TrueWeight, FalseWeight)) { uint64_t MinWeight = std::min(TrueWeight, FalseWeight); TotalWeight = TrueWeight + FalseWeight; // Is there a path with frequency <ColdOperandThreshold% (default:20%) ? @@ -664,7 +690,7 @@ bool SelectOptimize::hasExpensiveColdOperand( OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front()); ORmiss << "Profile data available but missing branch-weights metadata for " "select instruction. "; - ORE->emit(ORmiss); + EmitAndPrintRemark(ORE, ORmiss); } if (!ColdOperand) return false; @@ -682,7 +708,7 @@ bool SelectOptimize::hasExpensiveColdOperand( } if (ColdI) { std::stack<Instruction *> ColdSlice; - getExclBackwardsSlice(ColdI, ColdSlice); + getExclBackwardsSlice(ColdI, ColdSlice, SI); InstructionCost SliceCost = 0; while (!ColdSlice.empty()) { SliceCost += TTI->getInstructionCost(ColdSlice.top(), @@ -703,6 +729,22 @@ bool SelectOptimize::hasExpensiveColdOperand( return false; } +// Check if it is safe to move LoadI next to the SI. +// Conservatively assume it is safe only if there is no instruction +// modifying memory in-between the load and the select instruction. +static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI) { + // Assume loads from different basic blocks are unsafe to move. + if (LoadI->getParent() != SI->getParent()) + return false; + auto It = LoadI->getIterator(); + while (&*It != SI) { + if (It->mayWriteToMemory()) + return false; + It++; + } + return true; +} + // For a given source instruction, collect its backwards dependence slice // consisting of instructions exclusively computed for the purpose of producing // the operands of the source instruction. As an approximation @@ -711,7 +753,7 @@ bool SelectOptimize::hasExpensiveColdOperand( // form an one-use chain that leads to the source instruction. void SelectOptimize::getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice, - bool ForSinking) { + Instruction *SI, bool ForSinking) { SmallPtrSet<Instruction *, 2> Visited; std::queue<Instruction *> Worklist; Worklist.push(I); @@ -733,6 +775,13 @@ void SelectOptimize::getExclBackwardsSlice(Instruction *I, isa<SelectInst>(II) || isa<PHINode>(II))) continue; + // Avoid sinking loads in order not to skip state-modifying instructions, + // that may alias with the loaded address. + // Only allow sinking of loads within the same basic block that are + // conservatively proven to be safe. + if (ForSinking && II->mayReadFromMemory() && !isSafeToSinkLoad(II, SI)) + continue; + // Avoid considering instructions with less frequency than the source // instruction (i.e., avoid colder code regions of the dependence slice). if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent())) @@ -750,7 +799,7 @@ void SelectOptimize::getExclBackwardsSlice(Instruction *I, bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) { uint64_t TrueWeight, FalseWeight; - if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { + if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) { uint64_t Max = std::max(TrueWeight, FalseWeight); uint64_t Sum = TrueWeight + FalseWeight; if (Sum != 0) { @@ -777,7 +826,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L, LoopCost[1].NonPredCost >= LoopCost[1].PredCost) { ORmissL << "No select conversion in the loop due to no reduction of loop's " "critical path. "; - ORE->emit(ORmissL); + EmitAndPrintRemark(ORE, ORmissL); return false; } @@ -794,7 +843,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L, "loop's critical path. Gain=" << Gain[1].toString() << ", RelativeGain=" << RelativeGain.toString() << "%. "; - ORE->emit(ORmissL); + EmitAndPrintRemark(ORE, ORmissL); return false; } @@ -810,7 +859,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L, ORmissL << "No select conversion in the loop due to small gradient gain. " "GradientGain=" << GradientGain.toString() << "%. "; - ORE->emit(ORmissL); + EmitAndPrintRemark(ORE, ORmissL); return false; } } @@ -818,7 +867,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L, else if (Gain[1] < Gain[0]) { ORmissL << "No select conversion in the loop due to negative gradient gain. "; - ORE->emit(ORmissL); + EmitAndPrintRemark(ORE, ORmissL); return false; } @@ -834,6 +883,8 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L, bool SelectOptimize::computeLoopCosts( const Loop *L, const SelectGroups &SIGroups, DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) { + LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop " + << L->getHeader()->getName() << "\n"); const auto &SIset = getSIset(SIGroups); // Compute instruction and loop-critical-path costs across two iterations for // both predicated and non-predicated version. @@ -867,11 +918,11 @@ bool SelectOptimize::computeLoopCosts( ORmissL << "Invalid instruction cost preventing analysis and " "optimization of the inner-most loop containing this " "instruction. "; - ORE->emit(ORmissL); + EmitAndPrintRemark(ORE, ORmissL); return false; } - IPredCost += Scaled64::get(ILatency.value()); - INonPredCost += Scaled64::get(ILatency.value()); + IPredCost += Scaled64::get(*ILatency); + INonPredCost += Scaled64::get(*ILatency); // For a select that can be converted to branch, // compute its cost as a branch (non-predicated cost). @@ -880,7 +931,7 @@ bool SelectOptimize::computeLoopCosts( // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate if (SIset.contains(&I)) { - auto SI = dyn_cast<SelectInst>(&I); + auto SI = cast<SelectInst>(&I); Scaled64 TrueOpCost = Scaled64::getZero(), FalseOpCost = Scaled64::getZero(); @@ -901,12 +952,17 @@ bool SelectOptimize::computeLoopCosts( INonPredCost = PredictedPathCost + MispredictCost; } + LLVM_DEBUG(dbgs() << " " << ILatency << "/" << IPredCost << "/" + << INonPredCost << " for " << I << "\n"); InstCostMap[&I] = {IPredCost, INonPredCost}; MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost); MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost); } } + LLVM_DEBUG(dbgs() << "Iteration " << Iter + 1 + << " MaxCost = " << MaxCost.PredCost << " " + << MaxCost.NonPredCost << "\n"); } return true; } @@ -920,12 +976,12 @@ SelectOptimize::getSIset(const SelectGroups &SIGroups) { return SIset; } -Optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) { +std::optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) { InstructionCost ICost = TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency); if (auto OC = ICost.getValue()) - return Optional<uint64_t>(*OC); - return Optional<uint64_t>(None); + return std::optional<uint64_t>(*OC); + return std::nullopt; } ScaledNumber<uint64_t> @@ -959,7 +1015,7 @@ SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, const SelectInst *SI) { Scaled64 PredPathCost; uint64_t TrueWeight, FalseWeight; - if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { + if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) { uint64_t SumWeight = TrueWeight + FalseWeight; if (SumWeight != 0) { PredPathCost = TrueCost * Scaled64::get(TrueWeight) + diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a7f9382478d4..0a3ebd73d272 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -20,8 +20,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IntervalMap.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" @@ -69,9 +67,11 @@ #include <cstdint> #include <functional> #include <iterator> +#include <optional> #include <string> #include <tuple> #include <utility> +#include <variant> using namespace llvm; @@ -135,6 +135,11 @@ static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore( cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store")); +static cl::opt<bool> EnableVectorFCopySignExtendRound( + "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false), + cl::desc( + "Enable merging extends and rounds into FCOPYSIGN on vector types")); + namespace { class DAGCombiner { @@ -246,8 +251,8 @@ namespace { for (MVT VT : MVT::all_valuetypes()) if (EVT(VT).isSimple() && VT != MVT::Other && TLI.isTypeLegal(EVT(VT)) && - VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits) - MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize(); + VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits) + MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue(); } void ConsiderForPruning(SDNode *N) { @@ -382,6 +387,10 @@ namespace { SDValue PromoteExtend(SDValue Op); bool PromoteLoad(SDValue Op); + SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, + SDValue RHS, SDValue True, SDValue False, + ISD::CondCode CC); + /// Call the node-specific routine that knows how to fold each /// particular type of node. If that doesn't do anything, try the /// target-specific DAG combines. @@ -434,6 +443,7 @@ namespace { SDValue visitOR(SDNode *N); SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitXOR(SDNode *N); + SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL); SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL); SDValue visitSHL(SDNode *N); SDValue visitSRA(SDNode *N); @@ -494,6 +504,8 @@ namespace { SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain); SDValue replaceStoreOfFPConstant(StoreSDNode *ST); + bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N); + SDValue visitSTORE(SDNode *N); SDValue visitLIFETIME_END(SDNode *N); SDValue visitINSERT_VECTOR_ELT(SDNode *N); @@ -508,6 +520,8 @@ namespace { SDValue visitMSTORE(SDNode *N); SDValue visitMGATHER(SDNode *N); SDValue visitMSCATTER(SDNode *N); + SDValue visitVPGATHER(SDNode *N); + SDValue visitVPSCATTER(SDNode *N); SDValue visitFP_TO_FP16(SDNode *N); SDValue visitFP16_TO_FP(SDNode *N); SDValue visitFP_TO_BF16(SDNode *N); @@ -551,6 +565,7 @@ namespace { SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, const SDLoc &DL); SDValue foldSubToUSubSat(EVT DstVT, SDNode *N); + SDValue foldABSToABD(SDNode *N); SDValue unfoldMaskedMerge(SDNode *N); SDValue unfoldExtremeBitClearingToShifts(SDNode *N); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, @@ -567,6 +582,7 @@ namespace { SDValue CombineExtLoad(SDNode *N); SDValue CombineZExtLogicopShiftLoad(SDNode *N); SDValue combineRepeatedFPDivisors(SDNode *N); + SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex); SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); @@ -602,6 +618,7 @@ namespace { SDValue splitMergedValStore(StoreSDNode *ST); SDValue TransformFPLoadStorePair(SDNode *N); SDValue convertBuildVecZextToZext(SDNode *N); + SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N); SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); SDValue reduceBuildVecTruncToBitCast(SDNode *N); SDValue reduceBuildVecToShuffle(SDNode *N); @@ -1204,19 +1221,14 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG); dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n'); - // Replace all uses. If any nodes become isomorphic to other nodes and - // are deleted, make sure to remove them from our worklist. - WorklistRemover DeadNodes(*this); + // Replace all uses. DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); // Push the new node and any (possibly new) users onto the worklist. AddToWorklistWithUsers(TLO.New.getNode()); - // Finally, if the node is now dead, remove it from the graph. The node - // may not be dead if the replacement process recursively simplified to - // something else needing this node. - if (TLO.Old->use_empty()) - deleteAndRecombine(TLO.Old.getNode()); + // Finally, if the node is now dead, remove it from the graph. + recursivelyDeleteUnusedNodes(TLO.Old.getNode()); } /// Check the specified integer node value to see if it can be simplified or if @@ -1263,11 +1275,12 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: "; Trunc.dump(&DAG); dbgs() << '\n'); - WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); - deleteAndRecombine(Load); + AddToWorklist(Trunc.getNode()); + recursivelyDeleteUnusedNodes(Load); } SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { @@ -1522,13 +1535,15 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: "; Result.dump(&DAG); dbgs() << '\n'); - WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); - deleteAndRecombine(N); + AddToWorklist(Result.getNode()); + recursivelyDeleteUnusedNodes(N); return true; } + return false; } @@ -1746,7 +1761,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::AssertAlign: return visitAssertAlign(N); case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); case ISD::SIGN_EXTEND_VECTOR_INREG: - case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N); + case ISD::ZERO_EXTEND_VECTOR_INREG: + case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N); case ISD::TRUNCATE: return visitTRUNCATE(N); case ISD::BITCAST: return visitBITCAST(N); case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); @@ -1964,7 +1980,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { Changed = true; break; } - LLVM_FALLTHROUGH; + [[fallthrough]]; default: // Only add if it isn't already in the list. @@ -2187,54 +2203,29 @@ static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, std::swap(N0, N1); // TODO: Should this apply to scalar select too? - if (!N1.hasOneUse() || N1.getOpcode() != ISD::VSELECT) + if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse()) return SDValue(); + // We can't hoist div/rem because of immediate UB (not speculatable). unsigned Opcode = N->getOpcode(); + if (!DAG.isSafeToSpeculativelyExecute(Opcode)) + return SDValue(); + EVT VT = N->getValueType(0); SDValue Cond = N1.getOperand(0); SDValue TVal = N1.getOperand(1); SDValue FVal = N1.getOperand(2); - // TODO: The cases should match with IR's ConstantExpr::getBinOpIdentity(). - // TODO: Target-specific opcodes could be added. Ex: "isCommutativeBinOp()". - // TODO: With fast-math (NSZ), allow the opposite-sign form of zero? - auto isIdentityConstantForOpcode = [](unsigned Opcode, SDValue V) { - if (ConstantFPSDNode *C = isConstOrConstSplatFP(V)) { - switch (Opcode) { - case ISD::FADD: // X + -0.0 --> X - return C->isZero() && C->isNegative(); - case ISD::FSUB: // X - 0.0 --> X - return C->isZero() && !C->isNegative(); - case ISD::FMUL: // X * 1.0 --> X - case ISD::FDIV: // X / 1.0 --> X - return C->isExactlyValue(1.0); - } - } - if (ConstantSDNode *C = isConstOrConstSplat(V)) { - switch (Opcode) { - case ISD::ADD: // X + 0 --> X - case ISD::SUB: // X - 0 --> X - case ISD::SHL: // X << 0 --> X - case ISD::SRA: // X s>> 0 --> X - case ISD::SRL: // X u>> 0 --> X - return C->isZero(); - case ISD::MUL: // X * 1 --> X - return C->isOne(); - } - } - return false; - }; - // This transform increases uses of N0, so freeze it to be safe. // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal) - if (isIdentityConstantForOpcode(Opcode, TVal)) { + unsigned OpNo = ShouldCommuteOperands ? 0 : 1; + if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) { SDValue F0 = DAG.getFreeze(N0); SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags()); return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO); } // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0 - if (isIdentityConstantForOpcode(Opcode, FVal)) { + if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) { SDValue F0 = DAG.getFreeze(N0); SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags()); return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0); @@ -2289,8 +2280,8 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { // or X, (select Cond, -1, 0) --> select Cond, -1, X bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) && - (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) && - (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF)); + ((isNullOrNullSplat(CT) && isAllOnesOrAllOnesSplat(CF)) || + (isNullOrNullSplat(CF) && isAllOnesOrAllOnesSplat(CT))); SDValue CBO = BO->getOperand(SelOpNo ^ 1); if (!CanFoldNonConst && @@ -2298,23 +2289,41 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { !DAG.isConstantFPBuildVectorOrConstantFP(CBO)) return SDValue(); - // We have a select-of-constants followed by a binary operator with a - // constant. Eliminate the binop by pulling the constant math into the select. - // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO SDLoc DL(Sel); - SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT) - : DAG.getNode(BinOpcode, DL, VT, CT, CBO); - if (!CanFoldNonConst && !NewCT.isUndef() && - !isConstantOrConstantVector(NewCT, true) && - !DAG.isConstantFPBuildVectorOrConstantFP(NewCT)) - return SDValue(); + SDValue NewCT, NewCF; - SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF) - : DAG.getNode(BinOpcode, DL, VT, CF, CBO); - if (!CanFoldNonConst && !NewCF.isUndef() && - !isConstantOrConstantVector(NewCF, true) && - !DAG.isConstantFPBuildVectorOrConstantFP(NewCF)) - return SDValue(); + if (CanFoldNonConst) { + // If CBO is an opaque constant, we can't rely on getNode to constant fold. + if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) || + (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT))) + NewCT = CT; + else + NewCT = CBO; + + if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) || + (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF))) + NewCF = CF; + else + NewCF = CBO; + } else { + // We have a select-of-constants followed by a binary operator with a + // constant. Eliminate the binop by pulling the constant math into the + // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT + + // CBO, CF + CBO + NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT) + : DAG.getNode(BinOpcode, DL, VT, CT, CBO); + if (!CanFoldNonConst && !NewCT.isUndef() && + !isConstantOrConstantVector(NewCT, true) && + !DAG.isConstantFPBuildVectorOrConstantFP(NewCT)) + return SDValue(); + + NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF) + : DAG.getNode(BinOpcode, DL, VT, CF, CBO); + if (!CanFoldNonConst && !NewCF.isUndef() && + !isConstantOrConstantVector(NewCF, true) && + !DAG.isConstantFPBuildVectorOrConstantFP(NewCF)) + return SDValue(); + } SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF); SelectOp->setFlags(BO->getFlags()); @@ -2668,9 +2677,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) { } // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2) - if ((N0.getOpcode() == ISD::ADD) && - (N0.getOperand(1).getOpcode() == ISD::VSCALE) && - (N1.getOpcode() == ISD::VSCALE)) { + if (N0.getOpcode() == ISD::ADD && + N0.getOperand(1).getOpcode() == ISD::VSCALE && + N1.getOpcode() == ISD::VSCALE) { const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0); const APInt &VS1 = N1->getConstantOperandAPInt(0); SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1); @@ -2687,9 +2696,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) { } // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2) - if ((N0.getOpcode() == ISD::ADD) && - (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) && - (N1.getOpcode() == ISD::STEP_VECTOR)) { + if (N0.getOpcode() == ISD::ADD && + N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR && + N1.getOpcode() == ISD::STEP_VECTOR) { const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0); const APInt &SV1 = N1->getConstantOperandAPInt(0); APInt NewStep = SV0 + SV1; @@ -2789,16 +2798,26 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) { /// the opcode and bypass the mask operation. static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL) { + if (N1.getOpcode() == ISD::ZERO_EXTEND) + N1 = N1.getOperand(0); + if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1))) return SDValue(); EVT VT = N0.getValueType(); - if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits()) + SDValue N10 = N1.getOperand(0); + if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE) + N10 = N10.getOperand(0); + + if (N10.getValueType() != VT) + return SDValue(); + + if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits()) return SDValue(); // add N0, (and (AssertSext X, i1), 1) --> sub N0, X // sub N0, (and (AssertSext X, i1), 1) --> add N0, X - return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0)); + return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10); } /// Helper for doing combines based on N0 and N1 being added to each other. @@ -3079,6 +3098,17 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) { if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N)) return Combined; + // We want to avoid useless duplication. + // TODO: This is done automatically for binary operations. As ADDCARRY is + // not a binary operation, this is not really possible to leverage this + // existing mechanism for it. However, if more operations require the same + // deduplication logic, then it may be worth generalize. + SDValue Ops[] = {N1, N0, CarryIn}; + SDNode *CSENode = + DAG.getNodeIfExists(ISD::ADDCARRY, N->getVTList(), Ops, N->getFlags()); + if (CSENode) + return SDValue(CSENode, 0); + return SDValue(); } @@ -3110,7 +3140,7 @@ SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) { * (addcarry X, 0, (addcarry A, B, Z):Carry) * * The end result is usually an increase in operation required, but because the - * carry is now linearized, other tranforms can kick in and optimize the DAG. + * carry is now linearized, other transforms can kick in and optimize the DAG. * * Patterns typically look something like * (uaddo A, B) @@ -3492,11 +3522,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (VT.isVector()) { SDValue N1S = DAG.getSplatValue(N1, true); if (N1S && N1S.getOpcode() == ISD::SUB && - isNullConstant(N1S.getOperand(0))) { - if (VT.isScalableVector()) - return DAG.getSplatVector(VT, DL, N1S.getOperand(1)); - return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1)); - } + isNullConstant(N1S.getOperand(0))) + return DAG.getSplat(VT, DL, N1S.getOperand(1)); } } @@ -3625,7 +3652,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return V; // (x - y) - 1 -> add (xor y, -1), x - if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) { + if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isOneOrOneSplat(N1)) { SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), DAG.getAllOnesConstant(DL, VT)); return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0)); @@ -3642,26 +3669,26 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // Hoist one-use addition by non-opaque constant: // (x + C) - y -> (x - y) + C - if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD && + if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1); return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1)); } // y - (x + C) -> (y - x) - C - if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD && + if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() && isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) { SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0)); return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1)); } // (x - C) - y -> (x - y) - C // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors. - if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && + if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1); return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1)); } // (C - x) - y -> C - (x + y) - if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && + if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) { SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1); return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add); @@ -3716,7 +3743,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C)) - if (N1.getOpcode() == ISD::VSCALE) { + if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) { const APInt &IntVal = N1.getConstantOperandAPInt(0); return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal)); } @@ -3749,6 +3776,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return DAG.getNode(ISD::ADD, DL, VT, N1, N0); } + // (sub (subcarry X, 0, Carry), Y) -> (subcarry X, Y, Carry) + if (N0.getOpcode() == ISD::SUBCARRY && isNullConstant(N0.getOperand(1)) && + N0.getResNo() == 0 && N0.hasOneUse()) + return DAG.getNode(ISD::SUBCARRY, DL, N0->getVTList(), + N0.getOperand(0), N1, N0.getOperand(2)); + if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) { // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry) if (SDValue Carry = getAsCarry(TLI, N0)) { @@ -3772,6 +3805,24 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } } + // max(a,b) - min(a,b) --> abd(a,b) + auto MatchSubMaxMin = [&](unsigned Max, unsigned Min, unsigned Abd) { + if (N0.getOpcode() != Max || N1.getOpcode() != Min) + return SDValue(); + if ((N0.getOperand(0) != N1.getOperand(0) || + N0.getOperand(1) != N1.getOperand(1)) && + (N0.getOperand(0) != N1.getOperand(1) || + N0.getOperand(1) != N1.getOperand(0))) + return SDValue(); + if (!TLI.isOperationLegalOrCustom(Abd, VT)) + return SDValue(); + return DAG.getNode(Abd, DL, VT, N0.getOperand(0), N0.getOperand(1)); + }; + if (SDValue R = MatchSubMaxMin(ISD::SMAX, ISD::SMIN, ISD::ABDS)) + return R; + if (SDValue R = MatchSubMaxMin(ISD::UMAX, ISD::UMIN, ISD::ABDU)) + return R; + return SDValue(); } @@ -3996,8 +4047,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // fold (mul x, -1) -> 0-x if (N1IsConst && ConstValue1.isAllOnes()) - return DAG.getNode(ISD::SUB, DL, VT, - DAG.getConstant(0, DL, VT), N0); + return DAG.getNegative(N0, DL, VT); // fold (mul x, (1 << c)) -> x << c if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && @@ -4021,6 +4071,21 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { getShiftAmountTy(N0.getValueType())))); } + // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the + // hi result is in use in case we hit this mid-legalization. + for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) { + if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) { + SDVTList LoHiVT = DAG.getVTList(VT, VT); + // TODO: Can we match commutable operands with getNodeIfExists? + if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1})) + if (LoHi->hasAnyUseOfValue(1)) + return SDValue(LoHi, 0); + if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0})) + if (LoHi->hasAnyUseOfValue(1)) + return SDValue(LoHi, 0); + } + } + // Try to transform: // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub. // mul x, (2^N + 1) --> add (shl x, N), x @@ -4064,7 +4129,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { DAG.getConstant(TZeros, DL, VT))) : DAG.getNode(MathOp, DL, VT, Shl, N0); if (ConstValue1.isNegative()) - R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R); + R = DAG.getNegative(R, DL, VT); return R; } } @@ -4108,21 +4173,21 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1)); // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)). - if (N0.getOpcode() == ISD::VSCALE) - if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) { - const APInt &C0 = N0.getConstantOperandAPInt(0); - const APInt &C1 = NC1->getAPIntValue(); - return DAG.getVScale(DL, VT, C0 * C1); - } + ConstantSDNode *NC1 = isConstOrConstSplat(N1); + if (N0.getOpcode() == ISD::VSCALE && NC1) { + const APInt &C0 = N0.getConstantOperandAPInt(0); + const APInt &C1 = NC1->getAPIntValue(); + return DAG.getVScale(DL, VT, C0 * C1); + } // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)). APInt MulVal; - if (N0.getOpcode() == ISD::STEP_VECTOR) - if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) { - const APInt &C0 = N0.getConstantOperandAPInt(0); - APInt NewStep = C0 * MulVal; - return DAG.getStepVector(DL, VT, NewStep); - } + if (N0.getOpcode() == ISD::STEP_VECTOR && + ISD::isConstantSplatVector(N1.getNode(), MulVal)) { + const APInt &C0 = N0.getConstantOperandAPInt(0); + APInt NewStep = C0 * MulVal; + return DAG.getStepVector(DL, VT, NewStep); + } // Fold ((mul x, 0/undef) -> 0, // (mul x, 1) -> x) -> x) @@ -4318,7 +4383,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // fold (sdiv X, -1) -> 0-X ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && N1C->isAllOnes()) - return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); + return DAG.getNegative(N0, DL, VT); // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0) if (N1C && N1C->getAPIntValue().isMinSignedValue()) @@ -4465,10 +4530,11 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { // fold (udiv X, -1) -> select(X == -1, 1, 0) ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N1C && N1C->isAllOnes()) + if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) { return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT)); + } if (SDValue V = simplifyDivRem(N, DAG)) return V; @@ -4571,7 +4637,8 @@ SDValue DAGCombiner::visitREM(SDNode *N) { // fold (urem X, -1) -> select(FX == -1, 0, FX) // Freeze the numerator to avoid a miscompile with an undefined value. - if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false)) { + if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) && + CCVT.isVector() == VT.isVector()) { SDValue F0 = DAG.getFreeze(N0); SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ); return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0); @@ -5328,6 +5395,21 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) { return DAG.getNode(HandOpcode, DL, VT, Logic); } + // For funnel shifts FSHL/FSHR: + // logic_op (OP x, x1, s), (OP y, y1, s) --> + // --> OP (logic_op x, y), (logic_op, x1, y1), s + if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) && + N0.getOperand(2) == N1.getOperand(2)) { + if (!N0.hasOneUse() || !N1.hasOneUse()) + return SDValue(); + SDValue X1 = N0.getOperand(1); + SDValue Y1 = N1.getOperand(1); + SDValue S = N0.getOperand(2); + SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y); + SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1); + return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S); + } + // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) // Only perform this optimization up until type legalization, before // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by @@ -6139,6 +6221,43 @@ static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z); } +/// Given a tree of logic operations with shape like +/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) +/// try to match and fold shift operations with the same shift amount. +/// For example: +/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) --> +/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W) +static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, + SDValue RightHand, SelectionDAG &DAG) { + unsigned LogicOpcode = N->getOpcode(); + assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || + LogicOpcode == ISD::XOR)); + if (LeftHand.getOpcode() != LogicOpcode || + RightHand.getOpcode() != LogicOpcode) + return SDValue(); + if (!LeftHand.hasOneUse() || !RightHand.hasOneUse()) + return SDValue(); + + // Try to match one of following patterns: + // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) + // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y)) + // Note that foldLogicOfShifts will handle commuted versions of the left hand + // itself. + SDValue CombinedShifts, W; + SDValue R0 = RightHand.getOperand(0); + SDValue R1 = RightHand.getOperand(1); + if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG))) + W = R1; + else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG))) + W = R0; + else + return SDValue(); + + EVT VT = N->getValueType(0); + SDLoc DL(N); + return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W); +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -6175,8 +6294,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0); ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true); - if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && N0.hasOneUse() && - Splat && N1.hasOneUse()) { + if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat && + N1.hasOneUse()) { EVT LoadVT = MLoad->getMemoryVT(); EVT ExtVT = VT; if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) { @@ -6186,11 +6305,16 @@ SDValue DAGCombiner::visitAND(SDNode *N) { uint64_t ElementSize = LoadVT.getVectorElementType().getScalarSizeInBits(); if (Splat->getAPIntValue().isMask(ElementSize)) { - return DAG.getMaskedLoad( + auto NewLoad = DAG.getMaskedLoad( ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(), MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(), LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(), ISD::ZEXTLOAD, MLoad->isExpandingLoad()); + bool LoadHasOtherUsers = !N0.hasOneUse(); + CombineTo(N, NewLoad); + if (LoadHasOtherUsers) + CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1)); + return SDValue(N, 0); } } } @@ -6213,14 +6337,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags())) return RAND; - // Try to convert a constant mask AND into a shuffle clear mask. - if (VT.isVector()) - if (SDValue Shuffle = XformToShuffleWithZero(N)) - return Shuffle; - - if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N)) - return Combined; - // fold (and (or x, C), D) -> D if (C & D) == D auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) { return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue()); @@ -6228,23 +6344,32 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (N0.getOpcode() == ISD::OR && ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset)) return N1; + // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N0Op0 = N0.getOperand(0); APInt Mask = ~N1C->getAPIntValue(); Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits()); - if (DAG.MaskedValueIsZero(N0Op0, Mask)) { - SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), - N0.getValueType(), N0Op0); - - // Replace uses of the AND with uses of the Zero extend node. - CombineTo(N, Zext); + if (DAG.MaskedValueIsZero(N0Op0, Mask)) + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N0.getValueType(), N0Op0); + } - // We actually want to replace all uses of the any_extend with the - // zero_extend, to avoid duplicating things. This will later cause this - // AND to be folded. - CombineTo(N0.getNode(), Zext); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2))) + if (ISD::isExtOpcode(N0.getOpcode())) { + unsigned ExtOpc = N0.getOpcode(); + SDValue N0Op0 = N0.getOperand(0); + if (N0Op0.getOpcode() == ISD::AND && + (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) && + DAG.isConstantIntBuildVectorOrConstantInt(N1) && + DAG.isConstantIntBuildVectorOrConstantInt(N0Op0.getOperand(1)) && + N0->hasOneUse() && N0Op0->hasOneUse()) { + SDLoc DL(N); + SDValue NewMask = + DAG.getNode(ISD::AND, DL, VT, N1, + DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(1))); + return DAG.getNode(ISD::AND, DL, VT, + DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)), + NewMask); } } @@ -6353,6 +6478,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } + // Try to convert a constant mask AND into a shuffle clear mask. + if (VT.isVector()) + if (SDValue Shuffle = XformToShuffleWithZero(N)) + return Shuffle; + + if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N)) + return Combined; + if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C && ISD::isExtOpcode(N0.getOperand(0).getOpcode())) { SDValue Ext = N0.getOperand(0); @@ -6512,6 +6645,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue V = foldAndToUsubsat(N, DAG)) return V; + // Postpone until legalization completed to avoid interference with bswap + // folding + if (LegalOperations || VT.isVector()) + if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG)) + return R; + return SDValue(); } @@ -6911,6 +7050,10 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); + // fold or (and x, y), x --> x + if (N00 == N1 || N01 == N1) + return N1; + // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y) // TODO: Set AllowUndefs = true. if (getBitwiseNotOperand(N01, N00, @@ -6923,6 +7066,24 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1); } + if (N0.getOpcode() == ISD::XOR) { + // fold or (xor x, y), x --> or x, y + // or (xor x, y), (x and/or y) --> or x, y + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + if (N00 == N1) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1); + if (N01 == N1) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1); + + if (N1.getOpcode() == ISD::AND || N1.getOpcode() == ISD::OR) { + SDValue N10 = N1.getOperand(0); + SDValue N11 = N1.getOperand(1); + if ((N00 == N10 && N01 == N11) || (N00 == N11 && N01 == N10)) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N01); + } + } + if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG)) return R; @@ -7112,10 +7273,17 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (SDValue Combined = visitADDLike(N)) return Combined; + // Postpone until legalization completed to avoid interference with bswap + // folding + if (LegalOperations || VT.isVector()) + if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG)) + return R; + return SDValue(); } -static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) { +static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, + SDValue &Mask) { if (Op.getOpcode() == ISD::AND && DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { Mask = Op.getOperand(1); @@ -7125,7 +7293,7 @@ static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) { } /// Match "(X shl/srl V1) & V2" where V2 may not be present. -static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift, +static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask) { Op = stripConstantMask(DAG, Op, Mask); if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { @@ -7612,7 +7780,7 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { }; // TODO: Support pre-legalization funnel-shift by constant. - bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0); + bool IsRotate = LHSShiftArg == RHSShiftArg; if (!IsRotate && !(HasFSHL || HasFSHR)) { if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() && ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { @@ -7740,87 +7908,135 @@ struct ByteProvider { // ByteOffset is the offset of the byte in the value produced by the load. LoadSDNode *Load = nullptr; unsigned ByteOffset = 0; + unsigned VectorOffset = 0; ByteProvider() = default; - static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) { - return ByteProvider(Load, ByteOffset); + static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset, + unsigned VectorOffset) { + return ByteProvider(Load, ByteOffset, VectorOffset); } - static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); } + static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0, 0); } bool isConstantZero() const { return !Load; } bool isMemory() const { return Load; } bool operator==(const ByteProvider &Other) const { - return Other.Load == Load && Other.ByteOffset == ByteOffset; + return Other.Load == Load && Other.ByteOffset == ByteOffset && + Other.VectorOffset == VectorOffset; } private: - ByteProvider(LoadSDNode *Load, unsigned ByteOffset) - : Load(Load), ByteOffset(ByteOffset) {} + ByteProvider(LoadSDNode *Load, unsigned ByteOffset, unsigned VectorOffset) + : Load(Load), ByteOffset(ByteOffset), VectorOffset(VectorOffset) {} }; } // end anonymous namespace /// Recursively traverses the expression calculating the origin of the requested -/// byte of the given value. Returns None if the provider can't be calculated. +/// byte of the given value. Returns std::nullopt if the provider can't be +/// calculated. +/// +/// For all the values except the root of the expression, we verify that the +/// value has exactly one use and if not then return std::nullopt. This way if +/// the origin of the byte is returned it's guaranteed that the values which +/// contribute to the byte are not used outside of this expression. + +/// However, there is a special case when dealing with vector loads -- we allow +/// more than one use if the load is a vector type. Since the values that +/// contribute to the byte ultimately come from the ExtractVectorElements of the +/// Load, we don't care if the Load has uses other than ExtractVectorElements, +/// because those operations are independent from the pattern to be combined. +/// For vector loads, we simply care that the ByteProviders are adjacent +/// positions of the same vector, and their index matches the byte that is being +/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex +/// is the index used in an ExtractVectorElement, and \p StartingIndex is the +/// byte position we are trying to provide for the LoadCombine. If these do +/// not match, then we can not combine the vector loads. \p Index uses the +/// byte position we are trying to provide for and is matched against the +/// shl and load size. The \p Index algorithm ensures the requested byte is +/// provided for by the pattern, and the pattern does not over provide bytes. /// -/// For all the values except the root of the expression verifies that the value -/// has exactly one use and if it's not true return None. This way if the origin -/// of the byte is returned it's guaranteed that the values which contribute to -/// the byte are not used outside of this expression. /// -/// Because the parts of the expression are not allowed to have more than one -/// use this function iterates over trees, not DAGs. So it never visits the same -/// node more than once. -static const Optional<ByteProvider> +/// The supported LoadCombine pattern for vector loads is as follows +/// or +/// / \ +/// or shl +/// / \ | +/// or shl zext +/// / \ | | +/// shl zext zext EVE* +/// | | | | +/// zext EVE* EVE* LOAD +/// | | | +/// EVE* LOAD LOAD +/// | +/// LOAD +/// +/// *ExtractVectorElement +static const std::optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, - bool Root = false) { + std::optional<uint64_t> VectorIndex, + unsigned StartingIndex = 0) { + // Typical i64 by i8 pattern requires recursion up to 8 calls depth if (Depth == 10) - return None; + return std::nullopt; + + // Only allow multiple uses if the instruction is a vector load (in which + // case we will use the load for every ExtractVectorElement) + if (Depth && !Op.hasOneUse() && + (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector())) + return std::nullopt; - if (!Root && !Op.hasOneUse()) - return None; + // Fail to combine if we have encountered anything but a LOAD after handling + // an ExtractVectorElement. + if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value()) + return std::nullopt; - assert(Op.getValueType().isScalarInteger() && "can't handle other types"); unsigned BitWidth = Op.getValueSizeInBits(); if (BitWidth % 8 != 0) - return None; + return std::nullopt; unsigned ByteWidth = BitWidth / 8; assert(Index < ByteWidth && "invalid index requested"); (void) ByteWidth; switch (Op.getOpcode()) { case ISD::OR: { - auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1); + auto LHS = + calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex); if (!LHS) - return None; - auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1); + return std::nullopt; + auto RHS = + calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex); if (!RHS) - return None; + return std::nullopt; if (LHS->isConstantZero()) return RHS; if (RHS->isConstantZero()) return LHS; - return None; + return std::nullopt; } case ISD::SHL: { auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1)); if (!ShiftOp) - return None; + return std::nullopt; uint64_t BitShift = ShiftOp->getZExtValue(); + if (BitShift % 8 != 0) - return None; + return std::nullopt; uint64_t ByteShift = BitShift / 8; + // If we are shifting by an amount greater than the index we are trying to + // provide, then do not provide anything. Otherwise, subtract the index by + // the amount we shifted by. return Index < ByteShift ? ByteProvider::getConstantZero() : calculateByteProvider(Op->getOperand(0), Index - ByteShift, - Depth + 1); + Depth + 1, VectorIndex, Index); } case ISD::ANY_EXTEND: case ISD::SIGN_EXTEND: @@ -7828,37 +8044,70 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, SDValue NarrowOp = Op->getOperand(0); unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits(); if (NarrowBitWidth % 8 != 0) - return None; + return std::nullopt; uint64_t NarrowByteWidth = NarrowBitWidth / 8; if (Index >= NarrowByteWidth) return Op.getOpcode() == ISD::ZERO_EXTEND - ? Optional<ByteProvider>(ByteProvider::getConstantZero()) - : None; - return calculateByteProvider(NarrowOp, Index, Depth + 1); + ? std::optional<ByteProvider>(ByteProvider::getConstantZero()) + : std::nullopt; + return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex, + StartingIndex); } case ISD::BSWAP: return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1, - Depth + 1); + Depth + 1, VectorIndex, StartingIndex); + case ISD::EXTRACT_VECTOR_ELT: { + auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1)); + if (!OffsetOp) + return std::nullopt; + + VectorIndex = OffsetOp->getZExtValue(); + + SDValue NarrowOp = Op->getOperand(0); + unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits(); + if (NarrowBitWidth % 8 != 0) + return std::nullopt; + uint64_t NarrowByteWidth = NarrowBitWidth / 8; + + // Check to see if the position of the element in the vector corresponds + // with the byte we are trying to provide for. In the case of a vector of + // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases, + // the element will provide a range of bytes. For example, if we have a + // vector of i16s, each element provides two bytes (V[1] provides byte 2 and + // 3). + if (*VectorIndex * NarrowByteWidth > StartingIndex) + return std::nullopt; + if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex) + return std::nullopt; + + return calculateByteProvider(Op->getOperand(0), Index, Depth + 1, + VectorIndex, StartingIndex); + } case ISD::LOAD: { auto L = cast<LoadSDNode>(Op.getNode()); if (!L->isSimple() || L->isIndexed()) - return None; + return std::nullopt; unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits(); if (NarrowBitWidth % 8 != 0) - return None; + return std::nullopt; uint64_t NarrowByteWidth = NarrowBitWidth / 8; + // If the width of the load does not reach byte we are trying to provide for + // and it is not a ZEXTLOAD, then the load does not provide for the byte in + // question if (Index >= NarrowByteWidth) return L->getExtensionType() == ISD::ZEXTLOAD - ? Optional<ByteProvider>(ByteProvider::getConstantZero()) - : None; - return ByteProvider::getMemory(L, Index); + ? std::optional<ByteProvider>(ByteProvider::getConstantZero()) + : std::nullopt; + + unsigned BPVectorIndex = VectorIndex.value_or(0U); + return ByteProvider::getMemory(L, Index, BPVectorIndex); } } - return None; + return std::nullopt; } static unsigned littleEndianByteAt(unsigned BW, unsigned i) { @@ -7871,13 +8120,13 @@ static unsigned bigEndianByteAt(unsigned BW, unsigned i) { // Check if the bytes offsets we are looking at match with either big or // little endian value loaded. Return true for big endian, false for little -// endian, and None if match failed. -static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets, - int64_t FirstOffset) { +// endian, and std::nullopt if match failed. +static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets, + int64_t FirstOffset) { // The endian can be decided only when it is 2 bytes at least. unsigned Width = ByteOffsets.size(); if (Width < 2) - return None; + return std::nullopt; bool BigEndian = true, LittleEndian = true; for (unsigned i = 0; i < Width; i++) { @@ -7885,7 +8134,7 @@ static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets, LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i); BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i); if (!BigEndian && !LittleEndian) - return None; + return std::nullopt; } assert((BigEndian != LittleEndian) && "It should be either big endian or" @@ -7948,9 +8197,13 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { while (auto *Store = dyn_cast<StoreSDNode>(Chain)) { // All stores must be the same size to ensure that we are writing all of the // bytes in the wide value. + // This store should have exactly one use as a chain operand for another + // store in the merging set. If there are other chain uses, then the + // transform may not be safe because order of loads/stores outside of this + // set may not be preserved. // TODO: We could allow multiple sizes by tracking each stored byte. if (Store->getMemoryVT() != MemVT || !Store->isSimple() || - Store->isIndexed()) + Store->isIndexed() || !Store->hasOneUse()) return SDValue(); Stores.push_back(Store); Chain = Store->getChain(); @@ -7974,7 +8227,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX); int64_t FirstOffset = INT64_MAX; StoreSDNode *FirstStore = nullptr; - Optional<BaseIndexOffset> Base; + std::optional<BaseIndexOffset> Base; for (auto *Store : Stores) { // All the stores store different parts of the CombinedValue. A truncate is // required to get the partial value. @@ -8042,7 +8295,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { // Check that a store of the wide type is both allowed and fast on the target const DataLayout &Layout = DAG.getDataLayout(); - bool Fast = false; + unsigned Fast = 0; bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT, *FirstStore->getMemOperand(), &Fast); if (!Allowed || !Fast) @@ -8146,7 +8399,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian(); auto MemoryByteOffset = [&] (ByteProvider P) { assert(P.isMemory() && "Must be a memory byte provider"); - unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits(); + unsigned LoadBitWidth = P.Load->getMemoryVT().getScalarSizeInBits(); + assert(LoadBitWidth % 8 == 0 && "can only analyze providers for individual bytes not bit"); unsigned LoadByteWidth = LoadBitWidth / 8; @@ -8155,11 +8409,11 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { : littleEndianByteAt(LoadByteWidth, P.ByteOffset); }; - Optional<BaseIndexOffset> Base; + std::optional<BaseIndexOffset> Base; SDValue Chain; SmallPtrSet<LoadSDNode *, 8> Loads; - Optional<ByteProvider> FirstByteProvider; + std::optional<ByteProvider> FirstByteProvider; int64_t FirstOffset = INT64_MAX; // Check if all the bytes of the OR we are looking at are loaded from the same @@ -8167,7 +8421,9 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { SmallVector<int64_t, 8> ByteOffsets(ByteWidth); unsigned ZeroExtendedBytes = 0; for (int i = ByteWidth - 1; i >= 0; --i) { - auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true); + auto P = + calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt, + /*StartingIndex*/ i); if (!P) return SDValue(); @@ -8181,10 +8437,6 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { assert(P->isMemory() && "provenance should either be memory or zero"); LoadSDNode *L = P->Load; - assert(L->hasNUsesOfValue(1, 0) && L->isSimple() && - !L->isIndexed() && - "Must be enforced by calculateByteProvider"); - assert(L->getOffset().isUndef() && "Unindexed load must have undef offset"); // All loads must share the same chain SDValue LChain = L->getChain(); @@ -8196,8 +8448,25 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // Loads must share the same base address BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG); int64_t ByteOffsetFromBase = 0; + + // For vector loads, the expected load combine pattern will have an + // ExtractElement for each index in the vector. While each of these + // ExtractElements will be accessing the same base address as determined + // by the load instruction, the actual bytes they interact with will differ + // due to different ExtractElement indices. To accurately determine the + // byte position of an ExtractElement, we offset the base load ptr with + // the index multiplied by the byte size of each element in the vector. + if (L->getMemoryVT().isVector()) { + unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits(); + if (LoadWidthInBit % 8 != 0) + return SDValue(); + unsigned ByteOffsetFromVector = P->VectorOffset * LoadWidthInBit / 8; + Ptr.addToOffset(ByteOffsetFromVector); + } + if (!Base) Base = Ptr; + else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase)) return SDValue(); @@ -8213,6 +8482,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { Loads.insert(L); } + assert(!Loads.empty() && "All the bytes of the value must be loaded from " "memory, so there must be at least one load which produces the value"); assert(Base && "Base address of the accessed memory location must be set"); @@ -8236,8 +8506,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // Check if the bytes of the OR we are looking at match with either big or // little endian value load - Optional<bool> IsBigEndian = isBigEndian( - makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset); + std::optional<bool> IsBigEndian = isBigEndian( + ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset); if (!IsBigEndian) return SDValue(); @@ -8272,7 +8542,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { return SDValue(); // Check that a load of the wide type is both allowed and fast on the target - bool Fast = false; + unsigned Fast = 0; bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, *FirstLoad->getMemOperand(), &Fast); @@ -8445,6 +8715,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags())) return RXOR; + // fold (a^b) -> (a|b) iff a and b share no bits. + if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && + DAG.haveNoCommonBitsSet(N0, N1)) + return DAG.getNode(ISD::OR, DL, VT, N0, N1); + // look for 'add-like' folds: // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE) if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) && @@ -8536,8 +8811,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // fold (not (add X, -1)) -> (neg X) if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD && isAllOnesOrAllOnesSplat(N0.getOperand(1))) { - return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), - N0.getOperand(0)); + return DAG.getNegative(N0.getOperand(0), DL, VT); } // fold (xor (and x, y), y) -> (and (not x), y) @@ -8599,6 +8873,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return R; if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG)) return R; + if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG)) + return R; // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable if (SDValue MM = unfoldMaskedMerge(N)) @@ -8698,13 +8974,9 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N) { if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level)) return SDValue(); - // TODO: This is limited to early combining because it may reveal regressions - // otherwise. But since we just checked a target hook to see if this is - // desirable, that should have filtered out cases where this interferes - // with some other pattern matching. - if (!LegalTypes) - if (SDValue R = combineShiftOfShiftedLogic(N, DAG)) - return R; + // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)). + if (SDValue R = combineShiftOfShiftedLogic(N, DAG)) + return R; // We want to pull some binops through shifts, so that we have (and (shift)) // instead of (shift (and)), likewise for add, or, xor, etc. This sort of @@ -8723,11 +8995,6 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N) { break; } - // We require the RHS of the binop to be a constant and not opaque as well. - ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1)); - if (!BinOpCst) - return SDValue(); - // FIXME: disable this unless the input to the binop is a shift by a constant // or is copy/select. Enable this in other cases when figure out it's exactly // profitable. @@ -8745,16 +9012,17 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N) { if (IsCopyOrSelect && N->hasOneUse()) return SDValue(); - // Fold the constants, shifting the binop RHS by the shift amount. + // Attempt to fold the constants, shifting the binop RHS by the shift amount. SDLoc DL(N); EVT VT = N->getValueType(0); - SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1), - N->getOperand(1)); - assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); + if (SDValue NewRHS = DAG.FoldConstantArithmetic( + N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) { + SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0), + N->getOperand(1)); + return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS); + } - SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0), - N->getOperand(1)); - return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS); + return SDValue(); } SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { @@ -8832,7 +9100,7 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { unsigned NextOp = N0.getOpcode(); // fold (rot* (rot* x, c2), c1) - // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize)) % bitsize) + // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize) if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) { SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1); SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)); @@ -8848,6 +9116,8 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { if (Norm1 && Norm2) if (SDValue CombinedShift = DAG.FoldConstantArithmetic( CombineOp, dl, ShiftVT, {Norm1, Norm2})) { + CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT, + {CombinedShift, BitsizeC}); SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC}); return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0), @@ -9112,23 +9382,22 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return NewSHL; // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)). - if (N0.getOpcode() == ISD::VSCALE) - if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) { - const APInt &C0 = N0.getConstantOperandAPInt(0); - const APInt &C1 = NC1->getAPIntValue(); - return DAG.getVScale(SDLoc(N), VT, C0 << C1); - } + if (N0.getOpcode() == ISD::VSCALE && N1C) { + const APInt &C0 = N0.getConstantOperandAPInt(0); + const APInt &C1 = N1C->getAPIntValue(); + return DAG.getVScale(SDLoc(N), VT, C0 << C1); + } // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)). APInt ShlVal; - if (N0.getOpcode() == ISD::STEP_VECTOR) - if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) { - const APInt &C0 = N0.getConstantOperandAPInt(0); - if (ShlVal.ult(C0.getBitWidth())) { - APInt NewStep = C0 << ShlVal; - return DAG.getStepVector(SDLoc(N), VT, NewStep); - } + if (N0.getOpcode() == ISD::STEP_VECTOR && + ISD::isConstantSplatVector(N1.getNode(), ShlVal)) { + const APInt &C0 = N0.getConstantOperandAPInt(0); + if (ShlVal.ult(C0.getBitWidth())) { + APInt NewStep = C0 << ShlVal; + return DAG.getStepVector(SDLoc(N), VT, NewStep); } + } return SDValue(); } @@ -9168,6 +9437,28 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG, EVT NarrowVT = LeftOp.getOperand(0).getValueType(); unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits(); + // return true if U may use the lower bits of its operands + auto UserOfLowerBits = [NarrowVTSize](SDNode *U) { + if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) { + return true; + } + ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1)); + if (!UShiftAmtSrc) { + return true; + } + unsigned UShiftAmt = UShiftAmtSrc->getZExtValue(); + return UShiftAmt < NarrowVTSize; + }; + + // If the lower part of the MUL is also used and MUL_LOHI is supported + // do not introduce the MULH in favor of MUL_LOHI + unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI; + if (!ShiftOperand.hasOneUse() && + TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) && + llvm::any_of(ShiftOperand->uses(), UserOfLowerBits)) { + return SDValue(); + } + SDValue MulhRightOp; if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) { unsigned ActiveBits = IsSignExt @@ -9675,16 +9966,23 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // However when after the source operand of SRL is optimized into AND, the SRL // itself may not be optimized further. Look for it and add the BRCOND into // the worklist. + // + // The also tends to happen for binary operations when SimplifyDemandedBits + // is involved. + // + // FIXME: This is unecessary if we process the DAG in topological order, + // which we plan to do. This workaround can be removed once the DAG is + // processed in topological order. if (N->hasOneUse()) { SDNode *Use = *N->use_begin(); - if (Use->getOpcode() == ISD::BRCOND) - AddToWorklist(Use); - else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { - // Also look pass the truncate. + + // Look pass the truncate. + if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) Use = *Use->use_begin(); - if (Use->getOpcode() == ISD::BRCOND) - AddToWorklist(Use); - } + + if (Use->getOpcode() == ISD::BRCOND || Use->getOpcode() == ISD::AND || + Use->getOpcode() == ISD::OR || Use->getOpcode() == ISD::XOR) + AddToWorklist(Use); } // Try to transform this shift into a multiply-high if @@ -9760,7 +10058,7 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) { uint64_t PtrOff = IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8); Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff); - bool Fast = false; + unsigned Fast = 0; if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, RHS->getAddressSpace(), NewAlign, RHS->getMemOperand()->getFlags(), &Fast) && @@ -9843,8 +10141,8 @@ SDValue DAGCombiner::visitSHLSAT(SDNode *N) { // Given a ABS node, detect the following pattern: // (ABS (SUB (EXTEND a), (EXTEND b))). // Generates UABD/SABD instruction. -static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG, - const TargetLowering &TLI) { +SDValue DAGCombiner::foldABSToABD(SDNode *N) { + EVT VT = N->getValueType(0); SDValue AbsOp1 = N->getOperand(0); SDValue Op0, Op1; @@ -9857,10 +10155,14 @@ static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG, unsigned Opc0 = Op0.getOpcode(); // Check if the operands of the sub are (zero|sign)-extended. if (Opc0 != Op1.getOpcode() || - (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) + (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) { + // fold (abs (sub nsw x, y)) -> abds(x, y) + if (AbsOp1->getFlags().hasNoSignedWrap() && + TLI.isOperationLegalOrCustom(ISD::ABDS, VT)) + return DAG.getNode(ISD::ABDS, SDLoc(N), VT, Op0, Op1); return SDValue(); + } - EVT VT = N->getValueType(0); EVT VT1 = Op0.getOperand(0).getValueType(); EVT VT2 = Op1.getOperand(0).getValueType(); unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU; @@ -9897,9 +10199,24 @@ SDValue DAGCombiner::visitABS(SDNode *N) { if (DAG.SignBitIsZero(N0)) return N0; - if (SDValue ABD = combineABSToABD(N, DAG, TLI)) + if (SDValue ABD = foldABSToABD(N)) return ABD; + // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x))) + // iff zero_extend/truncate are free. + if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) { + EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT(); + if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) && + TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) && + hasOperation(ISD::ABS, ExtVT)) { + SDLoc DL(N); + return DAG.getNode( + ISD::ZERO_EXTEND, DL, VT, + DAG.getNode(ISD::ABS, DL, ExtVT, + DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0)))); + } + } + return SDValue(); } @@ -10053,14 +10370,11 @@ static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS); } -/// Generate Min/Max node -static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, - SDValue RHS, SDValue True, SDValue False, - ISD::CondCode CC, const TargetLowering &TLI, - SelectionDAG &DAG) { - if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) - return SDValue(); - +static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, + SDValue RHS, SDValue True, SDValue False, + ISD::CondCode CC, + const TargetLowering &TLI, + SelectionDAG &DAG) { EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); switch (CC) { case ISD::SETOLT: @@ -10101,6 +10415,46 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, } } +/// Generate Min/Max node +SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, + SDValue RHS, SDValue True, + SDValue False, ISD::CondCode CC) { + if ((LHS == True && RHS == False) || (LHS == False && RHS == True)) + return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG); + + // If we can't directly match this, try to see if we can pull an fneg out of + // the select. + SDValue NegTrue = TLI.getCheaperOrNeutralNegatedExpression( + True, DAG, LegalOperations, ForCodeSize); + if (!NegTrue) + return SDValue(); + + HandleSDNode NegTrueHandle(NegTrue); + + // Try to unfold an fneg from the select if we are comparing the negated + // constant. + // + // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K)) + // + // TODO: Handle fabs + if (LHS == NegTrue) { + // If we can't directly match this, try to see if we can pull an fneg out of + // the select. + SDValue NegRHS = TLI.getCheaperOrNeutralNegatedExpression( + RHS, DAG, LegalOperations, ForCodeSize); + if (NegRHS) { + HandleSDNode NegRHSHandle(NegRHS); + if (NegRHS == False) { + SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue, + False, CC, TLI, DAG); + return DAG.getNode(ISD::FNEG, DL, VT, Combined); + } + } + } + + return SDValue(); +} + /// If a (v)select has a condition value that is a sign-bit test, try to smear /// the condition operand sign-bit across the value width and use it as a mask. static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) { @@ -10138,6 +10492,25 @@ static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, + const TargetLowering &TLI) { + if (!TLI.convertSelectOfConstantsToMath(VT)) + return false; + + if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse()) + return true; + if (!TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) + return true; + + ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); + if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1))) + return true; + if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1))) + return true; + + return false; +} + SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { SDValue Cond = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -10154,96 +10527,106 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { if (!C1 || !C2) return SDValue(); + if (CondVT != MVT::i1 || LegalOperations) { + // fold (select Cond, 0, 1) -> (xor Cond, 1) + // We can't do this reliably if integer based booleans have different contents + // to floating point based booleans. This is because we can't tell whether we + // have an integer-based boolean or a floating-point-based boolean unless we + // can find the SETCC that produced it and inspect its operands. This is + // fairly easy if C is the SETCC node, but it can potentially be + // undiscoverable (or not reasonably discoverable). For example, it could be + // in another basic block or it could require searching a complicated + // expression. + if (CondVT.isInteger() && + TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) == + TargetLowering::ZeroOrOneBooleanContent && + TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) == + TargetLowering::ZeroOrOneBooleanContent && + C1->isZero() && C2->isOne()) { + SDValue NotCond = + DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT)); + if (VT.bitsEq(CondVT)) + return NotCond; + return DAG.getZExtOrTrunc(NotCond, DL, VT); + } + + return SDValue(); + } + // Only do this before legalization to avoid conflicting with target-specific // transforms in the other direction (create a select from a zext/sext). There // is also a target-independent combine here in DAGCombiner in the other // direction for (select Cond, -1, 0) when the condition is not i1. - if (CondVT == MVT::i1 && !LegalOperations) { - if (C1->isZero() && C2->isOne()) { - // select Cond, 0, 1 --> zext (!Cond) - SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); - if (VT != MVT::i1) - NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond); - return NotCond; - } - if (C1->isZero() && C2->isAllOnes()) { - // select Cond, 0, -1 --> sext (!Cond) - SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); - if (VT != MVT::i1) - NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond); - return NotCond; - } - if (C1->isOne() && C2->isZero()) { - // select Cond, 1, 0 --> zext (Cond) - if (VT != MVT::i1) - Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); - return Cond; - } - if (C1->isAllOnes() && C2->isZero()) { - // select Cond, -1, 0 --> sext (Cond) - if (VT != MVT::i1) - Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); - return Cond; - } - - // Use a target hook because some targets may prefer to transform in the - // other direction. - if (TLI.convertSelectOfConstantsToMath(VT)) { - // For any constants that differ by 1, we can transform the select into an - // extend and add. - const APInt &C1Val = C1->getAPIntValue(); - const APInt &C2Val = C2->getAPIntValue(); - if (C1Val - 1 == C2Val) { - // select Cond, C1, C1-1 --> add (zext Cond), C1-1 - if (VT != MVT::i1) - Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); - return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); - } - if (C1Val + 1 == C2Val) { - // select Cond, C1, C1+1 --> add (sext Cond), C1+1 - if (VT != MVT::i1) - Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); - return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); - } + assert(CondVT == MVT::i1 && !LegalOperations); - // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) - if (C1Val.isPowerOf2() && C2Val.isZero()) { - if (VT != MVT::i1) - Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); - SDValue ShAmtC = - DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL); - return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); - } + // select Cond, 1, 0 --> zext (Cond) + if (C1->isOne() && C2->isZero()) + return DAG.getZExtOrTrunc(Cond, DL, VT); - if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) - return V; - } + // select Cond, -1, 0 --> sext (Cond) + if (C1->isAllOnes() && C2->isZero()) + return DAG.getSExtOrTrunc(Cond, DL, VT); + + // select Cond, 0, 1 --> zext (!Cond) + if (C1->isZero() && C2->isOne()) { + SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); + NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT); + return NotCond; + } + + // select Cond, 0, -1 --> sext (!Cond) + if (C1->isZero() && C2->isAllOnes()) { + SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); + NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT); + return NotCond; + } + // Use a target hook because some targets may prefer to transform in the + // other direction. + if (!shouldConvertSelectOfConstantsToMath(Cond, VT, TLI)) return SDValue(); + + // For any constants that differ by 1, we can transform the select into + // an extend and add. + const APInt &C1Val = C1->getAPIntValue(); + const APInt &C2Val = C2->getAPIntValue(); + + // select Cond, C1, C1-1 --> add (zext Cond), C1-1 + if (C1Val - 1 == C2Val) { + Cond = DAG.getZExtOrTrunc(Cond, DL, VT); + return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); } - // fold (select Cond, 0, 1) -> (xor Cond, 1) - // We can't do this reliably if integer based booleans have different contents - // to floating point based booleans. This is because we can't tell whether we - // have an integer-based boolean or a floating-point-based boolean unless we - // can find the SETCC that produced it and inspect its operands. This is - // fairly easy if C is the SETCC node, but it can potentially be - // undiscoverable (or not reasonably discoverable). For example, it could be - // in another basic block or it could require searching a complicated - // expression. - if (CondVT.isInteger() && - TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) == - TargetLowering::ZeroOrOneBooleanContent && - TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) == - TargetLowering::ZeroOrOneBooleanContent && - C1->isZero() && C2->isOne()) { - SDValue NotCond = - DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT)); - if (VT.bitsEq(CondVT)) - return NotCond; - return DAG.getZExtOrTrunc(NotCond, DL, VT); + // select Cond, C1, C1+1 --> add (sext Cond), C1+1 + if (C1Val + 1 == C2Val) { + Cond = DAG.getSExtOrTrunc(Cond, DL, VT); + return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); + } + + // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) + if (C1Val.isPowerOf2() && C2Val.isZero()) { + Cond = DAG.getZExtOrTrunc(Cond, DL, VT); + SDValue ShAmtC = + DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL); + return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); } + // select Cond, -1, C --> or (sext Cond), C + if (C1->isAllOnes()) { + Cond = DAG.getSExtOrTrunc(Cond, DL, VT); + return DAG.getNode(ISD::OR, DL, VT, Cond, N2); + } + + // select Cond, C, -1 --> or (sext (not Cond)), C + if (C2->isAllOnes()) { + SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); + NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT); + return DAG.getNode(ISD::OR, DL, VT, NotCond, N1); + } + + if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + return V; + return SDValue(); } @@ -10351,10 +10734,17 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (SDValue V = DAG.simplifySelect(N0, N1, N2)) return V; - if (SDValue V = foldSelectOfConstants(N)) + if (SDValue V = foldBoolSelectToLogic(N, DAG)) return V; - if (SDValue V = foldBoolSelectToLogic(N, DAG)) + // select (not Cond), N1, N2 -> select Cond, N2, N1 + if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) { + SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1); + SelectOp->setFlags(Flags); + return SelectOp; + } + + if (SDValue V = foldSelectOfConstants(N)) return V; // If we can fold this based on the true/false value, do so. @@ -10439,13 +10829,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { } } - // select (not Cond), N1, N2 -> select Cond, N2, N1 - if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) { - SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1); - SelectOp->setFlags(Flags); - return SelectOp; - } - // Fold selects based on a setcc into other things, such as min/max/abs. if (N0.getOpcode() == ISD::SETCC) { SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1); @@ -10456,8 +10839,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // // This is OK if we don't care what happens if either operand is a NaN. if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI)) - if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, - CC, TLI, DAG)) + if (SDValue FMinMax = + combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC)) return FMinMax; // Use 'unsigned add with overflow' to optimize an unsigned saturating add. @@ -10568,23 +10951,37 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { } bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, - SelectionDAG &DAG) { - if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD) + SelectionDAG &DAG, const SDLoc &DL) { + if (Index.getOpcode() != ISD::ADD) return false; // Only perform the transformation when existing operands can be reused. if (IndexIsScaled) return false; - // For now we check only the LHS of the add. - SDValue LHS = Index.getOperand(0); - SDValue SplatVal = DAG.getSplatValue(LHS); - if (!SplatVal || SplatVal.getValueType() != BasePtr.getValueType()) + if (!isNullConstant(BasePtr) && !Index.hasOneUse()) return false; - BasePtr = SplatVal; - Index = Index.getOperand(1); - return true; + EVT VT = BasePtr.getValueType(); + if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0)); + SplatVal && SplatVal.getValueType() == VT) { + if (isNullConstant(BasePtr)) + BasePtr = SplatVal; + else + BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal); + Index = Index.getOperand(1); + return true; + } + if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1)); + SplatVal && SplatVal.getValueType() == VT) { + if (isNullConstant(BasePtr)) + BasePtr = SplatVal; + else + BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal); + Index = Index.getOperand(0); + return true; + } + return false; } // Fold sext/zext of index into index type. @@ -10619,6 +11016,37 @@ bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, return false; } +SDValue DAGCombiner::visitVPSCATTER(SDNode *N) { + VPScatterSDNode *MSC = cast<VPScatterSDNode>(N); + SDValue Mask = MSC->getMask(); + SDValue Chain = MSC->getChain(); + SDValue Index = MSC->getIndex(); + SDValue Scale = MSC->getScale(); + SDValue StoreVal = MSC->getValue(); + SDValue BasePtr = MSC->getBasePtr(); + SDValue VL = MSC->getVectorLength(); + ISD::MemIndexType IndexType = MSC->getIndexType(); + SDLoc DL(N); + + // Zap scatters with a zero mask. + if (ISD::isConstantSplatVectorAllZeros(Mask.getNode())) + return Chain; + + if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) { + SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL}; + return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), + DL, Ops, MSC->getMemOperand(), IndexType); + } + + if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) { + SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL}; + return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), + DL, Ops, MSC->getMemOperand(), IndexType); + } + + return SDValue(); +} + SDValue DAGCombiner::visitMSCATTER(SDNode *N) { MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); SDValue Mask = MSC->getMask(); @@ -10634,7 +11062,7 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { if (ISD::isConstantSplatVectorAllZeros(Mask.getNode())) return Chain; - if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG)) { + if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) { SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops, MSC->getMemOperand(), IndexType, @@ -10700,8 +11128,9 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { // If this is a TRUNC followed by a masked store, fold this into a masked // truncating store. We can do this even if this is already a masked // truncstore. + // TODO: Try combine to masked compress store if possiable. if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() && - MST->isUnindexed() && + MST->isUnindexed() && !MST->isCompressingStore() && TLI.canCombineTruncStore(Value.getOperand(0).getValueType(), MST->getMemoryVT(), LegalOperations)) { auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(), @@ -10715,6 +11144,34 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitVPGATHER(SDNode *N) { + VPGatherSDNode *MGT = cast<VPGatherSDNode>(N); + SDValue Mask = MGT->getMask(); + SDValue Chain = MGT->getChain(); + SDValue Index = MGT->getIndex(); + SDValue Scale = MGT->getScale(); + SDValue BasePtr = MGT->getBasePtr(); + SDValue VL = MGT->getVectorLength(); + ISD::MemIndexType IndexType = MGT->getIndexType(); + SDLoc DL(N); + + if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) { + SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL}; + return DAG.getGatherVP( + DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL, + Ops, MGT->getMemOperand(), IndexType); + } + + if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) { + SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL}; + return DAG.getGatherVP( + DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL, + Ops, MGT->getMemOperand(), IndexType); + } + + return SDValue(); +} + SDValue DAGCombiner::visitMGATHER(SDNode *N) { MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N); SDValue Mask = MGT->getMask(); @@ -10730,7 +11187,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { if (ISD::isConstantSplatVectorAllZeros(Mask.getNode())) return CombineTo(N, PassThru, MGT->getChain()); - if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG)) { + if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) { SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale}; return DAG.getMaskedGather( DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL, @@ -10782,7 +11239,7 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { SDValue N2 = N->getOperand(2); EVT VT = N->getValueType(0); if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 || - !TLI.convertSelectOfConstantsToMath(VT) || + !shouldConvertSelectOfConstantsToMath(Cond, VT, TLI) || !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) || !ISD::isBuildVectorOfConstantSDNodes(N2.getNode())) return SDValue(); @@ -10895,8 +11352,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { // NaN. // if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) { - if (SDValue FMinMax = - combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG)) + if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC)) return FMinMax; } @@ -11037,8 +11493,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD && ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT, /*AllowUndefs*/ true)) { - OpRHS = DAG.getNode(ISD::SUB, DL, VT, - DAG.getConstant(0, DL, VT), OpRHS); + OpRHS = DAG.getNegative(OpRHS, DL, VT); return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS); } @@ -11109,6 +11564,11 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { if (N2 == N3) return N2; + // select_cc bool, 0, x, y, seteq -> select bool, y, x + if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 && + isNullConstant(N1)) + return DAG.getSelect(SDLoc(N), N2.getValueType(), N0, N3, N2); + // Determine if the condition we're dealing with is constant if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, CC, SDLoc(N), false)) { @@ -11323,9 +11783,11 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, SDLoc DL(N); assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || - Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || - Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) - && "Expected EXTEND dag node in input!"); + Opcode == ISD::ANY_EXTEND || + Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || + Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || + Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && + "Expected EXTEND dag node in input!"); // fold (sext c1) -> c1 // fold (zext c1) -> c1 @@ -11373,15 +11835,13 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, SmallVector<SDValue, 8> Elts; unsigned NumElts = VT.getVectorNumElements(); - // For zero-extensions, UNDEF elements still guarantee to have the upper - // bits set to zero. - bool IsZext = - Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG; - for (unsigned i = 0; i != NumElts; ++i) { SDValue Op = N0.getOperand(i); if (Op.isUndef()) { - Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT)); + if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG) + Elts.push_back(DAG.getUNDEF(SVT)); + else + Elts.push_back(DAG.getConstant(0, DL, SVT)); continue; } @@ -11952,7 +12412,7 @@ SDValue DAGCombiner::foldSextSetcc(SDNode *N) { if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true)) return SCC; - if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) { + if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) { EVT SetCCVT = getSetCCResultType(N00VT); // Don't do this transform for i1 because there's a select transform // that would reverse it. @@ -11973,6 +12433,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVCastOp(N, DL)) + return FoldedVOp; + // sext(undef) = 0 because the top bit will all be the same. if (N0.isUndef()) return DAG.getConstant(0, DL, VT); @@ -11985,6 +12449,16 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0)); + // fold (sext (sext_inreg x)) -> (sext (trunc x)) + if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) { + SDValue N00 = N0.getOperand(0); + EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT(); + if (N00.getOpcode() == ISD::TRUNCATE && (!LegalOperations || TLI.isTypeLegal(ExtVT))) { + SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00.getOperand(0)); + return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T); + } + } + if (N0.getOpcode() == ISD::TRUNCATE) { // fold (sext (truncate (load x))) -> (sext (smaller load x)) // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) @@ -12121,7 +12595,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND && TLI.isOperationLegalOrCustom(ISD::SUB, VT)) { SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT); - return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext); + return DAG.getNegative(Zext, DL, VT); } // Eliminate this sign extend by doing a decrement in the destination type: // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1) @@ -12218,10 +12692,41 @@ static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) { return DAG.getNode(ISD::CTPOP, DL, VT, NewZext); } +// If we have (zext (abs X)) where X is a type that will be promoted by type +// legalization, convert to (abs (sext X)). But don't extend past a legal type. +static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) { + assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend."); + + EVT VT = Extend->getValueType(0); + if (VT.isVector()) + return SDValue(); + + SDValue Abs = Extend->getOperand(0); + if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse()) + return SDValue(); + + EVT AbsVT = Abs.getValueType(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.getTypeAction(*DAG.getContext(), AbsVT) != + TargetLowering::TypePromoteInteger) + return SDValue(); + + EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT); + + SDValue SExt = + DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0)); + SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt); + return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT); +} + SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N))) + return FoldedVOp; + // zext(undef) = 0 if (N0.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); @@ -12478,6 +12983,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (SDValue NewCtPop = widenCtPop(N, DAG)) return NewCtPop; + if (SDValue V = widenAbs(N, DAG)) + return V; + if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG)) return Res; @@ -12904,8 +13412,8 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { auto AdjustBigEndianShift = [&](unsigned ShAmt) { unsigned LVTStoreBits = - LN0->getMemoryVT().getStoreSizeInBits().getFixedSize(); - unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize(); + LN0->getMemoryVT().getStoreSizeInBits().getFixedValue(); + unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue(); return LVTStoreBits - EVTStoreBits - ShAmt; }; @@ -13146,16 +13654,75 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1); } + // Fold (iM_signext_inreg + // (extract_subvector (zext|anyext|sext iN_v to _) _) + // from iN) + // -> (extract_subvector (signext iN_v to iM)) + if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && + ISD::isExtOpcode(N0.getOperand(0).getOpcode())) { + SDValue InnerExt = N0.getOperand(0); + EVT InnerExtVT = InnerExt->getValueType(0); + SDValue Extendee = InnerExt->getOperand(0); + + if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() && + (!LegalOperations || + TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) { + SDValue SignExtExtendee = + DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), InnerExtVT, Extendee); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, SignExtExtendee, + N0.getOperand(1)); + } + } + return SDValue(); } +static SDValue +foldExtendVectorInregToExtendOfSubvector(SDNode *N, const TargetLowering &TLI, + SelectionDAG &DAG, + bool LegalOperations) { + unsigned InregOpcode = N->getOpcode(); + unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode); + + SDValue Src = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT SrcVT = EVT::getVectorVT(*DAG.getContext(), + Src.getValueType().getVectorElementType(), + VT.getVectorElementCount()); + + assert((InregOpcode == ISD::SIGN_EXTEND_VECTOR_INREG || + InregOpcode == ISD::ZERO_EXTEND_VECTOR_INREG || + InregOpcode == ISD::ANY_EXTEND_VECTOR_INREG) && + "Expected EXTEND_VECTOR_INREG dag node in input!"); + + // Profitability check: our operand must be an one-use CONCAT_VECTORS. + // FIXME: one-use check may be overly restrictive + if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS) + return SDValue(); + + // Profitability check: we must be extending exactly one of it's operands. + // FIXME: this is probably overly restrictive. + Src = Src.getOperand(0); + if (Src.getValueType() != SrcVT) + return SDValue(); + + if (LegalOperations && !TLI.isOperationLegal(Opcode, VT)) + return SDValue(); + + return DAG.getNode(Opcode, SDLoc(N), VT, Src); +} + SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same. - if (N0.isUndef()) - return DAG.getConstant(0, SDLoc(N), VT); + if (N0.isUndef()) { + // aext_vector_inreg(undef) = undef because the top bits are undefined. + // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same. + return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG + ? DAG.getUNDEF(VT) + : DAG.getConstant(0, SDLoc(N), VT); + } if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) return Res; @@ -13163,6 +13730,10 @@ SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) { if (SimplifyDemandedVectorElts(SDValue(N, 0))) return SDValue(N, 0); + if (SDValue R = foldExtendVectorInregToExtendOfSubvector(N, TLI, DAG, + LegalOperations)) + return R; + return SDValue(); } @@ -13420,18 +13991,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); - // See if we can simplify the input to this truncate through knowledge that - // only the low bits are being used. - // For example "trunc (or (shl x, 8), y)" // -> trunc y - // Currently we only perform this optimization on scalars because vectors - // may have different active low bits. - if (!VT.isVector()) { - APInt Mask = - APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits()); - if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask)) - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); - } - // fold (truncate (extract_subvector(ext x))) -> // (extract_subvector x) // TODO: This can be generalized to cover cases where the truncate and extract @@ -13536,7 +14095,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { LD1->getAddressSpace() != LD2->getAddressSpace()) return SDValue(); - bool LD1Fast = false; + unsigned LD1Fast = 0; EVT LD1VT = LD1->getValueType(0); unsigned LD1Bytes = LD1VT.getStoreSize(); if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && @@ -13866,15 +14425,72 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false)) return N0; - // Fold freeze(bitcast(x)) -> bitcast(freeze(x)). - // TODO: Replace with pushFreezeToPreventPoisonFromPropagating fold. - if (N0.getOpcode() == ISD::BITCAST) - return DAG.getBitcast(N->getValueType(0), - DAG.getNode(ISD::FREEZE, SDLoc(N0), - N0.getOperand(0).getValueType(), - N0.getOperand(0))); + // Fold freeze(op(x, ...)) -> op(freeze(x), ...). + // Try to push freeze through instructions that propagate but don't produce + // poison as far as possible. If an operand of freeze follows three + // conditions 1) one-use, 2) does not produce poison, and 3) has all but one + // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push + // the freeze through to the operands that are not guaranteed non-poison. + // NOTE: we will strip poison-generating flags, so ignore them here. + if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false, + /*ConsiderFlags*/ false) || + N0->getNumValues() != 1 || !N0->hasOneUse()) + return SDValue(); - return SDValue(); + bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR; + + SmallSetVector<SDValue, 8> MaybePoisonOperands; + for (SDValue Op : N0->ops()) { + if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false, + /*Depth*/ 1)) + continue; + bool HadMaybePoisonOperands = !MaybePoisonOperands.empty(); + bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op); + if (!HadMaybePoisonOperands) + continue; + if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) { + // Multiple maybe-poison ops when not allowed - bail out. + return SDValue(); + } + } + // NOTE: the whole op may be not guaranteed to not be undef or poison because + // it could create undef or poison due to it's poison-generating flags. + // So not finding any maybe-poison operands is fine. + + for (SDValue MaybePoisonOperand : MaybePoisonOperands) { + // Don't replace every single UNDEF everywhere with frozen UNDEF, though. + if (MaybePoisonOperand.getOpcode() == ISD::UNDEF) + continue; + // First, freeze each offending operand. + SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand); + // Then, change all other uses of unfrozen operand to use frozen operand. + DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand); + if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE && + FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) { + // But, that also updated the use in the freeze we just created, thus + // creating a cycle in a DAG. Let's undo that by mutating the freeze. + DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(), + MaybePoisonOperand); + } + } + + // The whole node may have been updated, so the value we were holding + // may no longer be valid. Re-fetch the operand we're `freeze`ing. + N0 = N->getOperand(0); + + // Finally, recreate the node, it's operands were updated to use + // frozen operands, so we just need to use it's "original" operands. + SmallVector<SDValue> Ops(N0->op_begin(), N0->op_end()); + // Special-handle ISD::UNDEF, each single one of them can be it's own thing. + for (SDValue &Op : Ops) { + if (Op.getOpcode() == ISD::UNDEF) + Op = DAG.getFreeze(Op); + } + // NOTE: this strips poison generating flags. + SDValue R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops); + assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) && + "Can't create node that may be undef/poison!"); + return R; } /// We know that BV is a build_vector node with Constant, ConstantFP or Undef @@ -14038,26 +14654,37 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E) // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E) + // This also works with nested fma instructions: + // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G --> + // fma A, B, (fma C, D, fma (E, F, G)) + // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) --> + // fma A, B, (fma C, D, fma (E, F, G)). // This requires reassociation because it changes the order of operations. - SDValue FMA, E; - if (CanReassociate && isFusedOp(N0) && - N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() && - N0.getOperand(2).hasOneUse()) { - FMA = N0; - E = N1; - } else if (CanReassociate && isFusedOp(N1) && - N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() && - N1.getOperand(2).hasOneUse()) { - FMA = N1; - E = N0; - } - if (FMA && E) { - SDValue A = FMA.getOperand(0); - SDValue B = FMA.getOperand(1); - SDValue C = FMA.getOperand(2).getOperand(0); - SDValue D = FMA.getOperand(2).getOperand(1); - SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E); - return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE); + if (CanReassociate) { + SDValue FMA, E; + if (isFusedOp(N0) && N0.hasOneUse()) { + FMA = N0; + E = N1; + } else if (isFusedOp(N1) && N1.hasOneUse()) { + FMA = N1; + E = N0; + } + + SDValue TmpFMA = FMA; + while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) { + SDValue FMul = TmpFMA->getOperand(2); + if (FMul.getOpcode() == ISD::FMUL && FMul.hasOneUse()) { + SDValue C = FMul.getOperand(0); + SDValue D = FMul.getOperand(1); + SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E); + DAG.ReplaceAllUsesOfValueWith(FMul, CDE); + // Replacing the inner FMul could cause the outer FMA to be simplified + // away. + return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue() : FMA; + } + + TmpFMA = TmpFMA->getOperand(2); + } } // Look through FP_EXTEND nodes to do more combining. @@ -14357,8 +14984,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); }; - auto isContractableAndReassociableFMUL = [isContractableFMUL, - isReassociable](SDValue N) { + auto isContractableAndReassociableFMUL = [&isContractableFMUL, + &isReassociable](SDValue N) { return isContractableFMUL(N) && isReassociable(N.getNode()); }; @@ -14593,8 +15220,8 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0); - bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1); + SDNode *N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0); + SDNode *N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; @@ -14691,8 +15318,10 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // of rounding steps. if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { if (N0.getOpcode() == ISD::FMUL) { - bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); - bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); + SDNode *CFP00 = + DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); + SDNode *CFP01 = + DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); // (fadd (fmul x, c), x) -> (fmul x, c+1) if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { @@ -14712,8 +15341,10 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } if (N1.getOpcode() == ISD::FMUL) { - bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); - bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); + SDNode *CFP10 = + DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); + SDNode *CFP11 = + DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); // (fadd x, (fmul x, c)) -> (fmul x, c+1) if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { @@ -14733,7 +15364,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } if (N0.getOpcode() == ISD::FADD) { - bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); + SDNode *CFP00 = + DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); // (fadd (fadd x, x), x) -> (fmul x, 3.0) if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) && (N0.getOperand(0) == N1)) { @@ -14743,7 +15375,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } if (N1.getOpcode() == ISD::FADD) { - bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); + SDNode *CFP10 = + DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); // (fadd x, (fadd x, x)) -> (fmul x, 3.0) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && N1.getOperand(0) == N0) { @@ -14956,12 +15589,14 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { TargetLowering::NegatibleCost::Expensive; SDValue NegN0 = TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0); - SDValue NegN1 = - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); - if (NegN0 && NegN1 && - (CostN0 == TargetLowering::NegatibleCost::Cheaper || - CostN1 == TargetLowering::NegatibleCost::Cheaper)) - return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1); + if (NegN0) { + HandleSDNode NegN0Handle(NegN0); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); + if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper || + CostN1 == TargetLowering::NegatibleCost::Cheaper)) + return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1); + } // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X) @@ -14990,7 +15625,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { case ISD::SETLT: case ISD::SETLE: std::swap(TrueOpnd, FalseOpnd); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETOGT: case ISD::SETUGT: case ISD::SETOGE: @@ -15047,12 +15682,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { TargetLowering::NegatibleCost::Expensive; SDValue NegN0 = TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0); - SDValue NegN1 = - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); - if (NegN0 && NegN1 && - (CostN0 == TargetLowering::NegatibleCost::Cheaper || - CostN1 == TargetLowering::NegatibleCost::Cheaper)) - return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2); + if (NegN0) { + HandleSDNode NegN0Handle(NegN0); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); + if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper || + CostN1 == TargetLowering::NegatibleCost::Cheaper)) + return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2); + } // FIXME: use fast math flags instead of Options.UnsafeFPMath if (Options.UnsafeFPMath) { @@ -15350,12 +15987,14 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { TargetLowering::NegatibleCost::Expensive; SDValue NegN0 = TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0); - SDValue NegN1 = - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); - if (NegN0 && NegN1 && - (CostN0 == TargetLowering::NegatibleCost::Cheaper || - CostN1 == TargetLowering::NegatibleCost::Cheaper)) - return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1); + if (NegN0) { + HandleSDNode NegN0Handle(NegN0); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); + if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper || + CostN1 == TargetLowering::NegatibleCost::Cheaper)) + return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1); + } return SDValue(); } @@ -15422,11 +16061,7 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { if (N1Op0VT == MVT::f128) return false; - // Avoid mismatched vector operand types, for better instruction selection. - if (N1Op0VT.isVector()) - return false; - - return true; + return !N1Op0VT.isVector() || EnableVectorFCopySignExtendRound; } return false; } @@ -15748,12 +16383,12 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); // fold (fp_round c1fp) -> c1fp - if (N0CFP) - return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1); + if (SDValue C = + DAG.FoldConstantArithmetic(ISD::FP_ROUND, SDLoc(N), VT, {N0, N1})) + return C; // fold (fp_round (fp_extend x)) -> x if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) @@ -15781,8 +16416,9 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { // Also, this is a value preserving truncation iff both fp_round's are. if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) { SDLoc DL(N); - return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0), - DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL)); + return DAG.getNode( + ISD::FP_ROUND, DL, VT, N0.getOperand(0), + DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true)); } } @@ -15805,6 +16441,10 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N))) + return FoldedVOp; + // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::FP_ROUND) @@ -15840,11 +16480,11 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { LN0->getBasePtr(), N0.getValueType(), LN0->getMemOperand()); CombineTo(N, ExtLoad); - CombineTo(N0.getNode(), - DAG.getNode(ISD::FP_ROUND, SDLoc(N0), - N0.getValueType(), ExtLoad, - DAG.getIntPtrConstant(1, SDLoc(N0))), - ExtLoad.getValue(1)); + CombineTo( + N0.getNode(), + DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad, + DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)), + ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -16599,7 +17239,6 @@ static inline ElementCount numVectorEltsOrZero(EVT T) { } bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) { - Val = ST->getValue(); EVT STType = Val.getValueType(); EVT STMemType = ST->getMemoryVT(); if (STType == STMemType) @@ -16655,7 +17294,7 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { SDValue Chain = LD->getOperand(0); StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode()); // TODO: Relax this restriction for unordered atomics (see D66309) - if (!ST || !ST->isSimple()) + if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace()) return SDValue(); EVT LDType = LD->getValueType(0); @@ -16691,9 +17330,10 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { // significant bit in the loaded value maps to the least significant bit in // the stored value). With Offset=n (for n > 0) the loaded value starts at the // n:th least significant byte of the stored value. + int64_t OrigOffset = Offset; if (DAG.getDataLayout().isBigEndian()) - Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() - - (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) / + Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() - + (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) / 8 - Offset; @@ -16705,8 +17345,8 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { if (LdStScalable) STCoversLD = (Offset == 0) && LdMemSize == StMemSize; else - STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <= - StMemSize.getFixedSize()); + STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <= + StMemSize.getFixedValue()); auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue { if (LD->isIndexed()) { @@ -16735,18 +17375,30 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { // Mask to size of LDMemType auto Mask = DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(), - StMemSize.getFixedSize()), + StMemSize.getFixedValue()), SDLoc(ST), STType); auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask); return ReplaceLd(LD, Val, Chain); } } + // Handle some cases for big-endian that would be Offset 0 and handled for + // little-endian. + SDValue Val = ST->getValue(); + if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) { + if (STType.isInteger() && !STType.isVector() && LDType.isInteger() && + !LDType.isVector() && isTypeLegal(STType) && + TLI.isOperationLegal(ISD::SRL, STType)) { + Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val, + DAG.getConstant(Offset * 8, SDLoc(LD), STType)); + Offset = 0; + } + } + // TODO: Deal with nonzero offset. if (LD->getBasePtr().isUndef() || Offset != 0) return SDValue(); // Model necessary truncations / extenstions. - SDValue Val; // Truncate Value To Stored Memory Size. do { if (!getTruncatedStoreValue(ST, Val)) @@ -17186,7 +17838,7 @@ struct LoadedSlice { // Check if it will be merged with the load. // 1. Check the alignment / fast memory access constraint. - bool IsFast = false; + unsigned IsFast = 0; if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT, Origin->getAddressSpace(), getAlign(), Origin->getMemOperand()->getFlags(), &IsFast) || @@ -17689,7 +18341,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { if (DAG.getDataLayout().isBigEndian()) PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; - bool IsFast = false; + unsigned IsFast = 0; Align NewAlign = commonAlignment(LD->getAlign(), PtrOff); if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT, LD->getAddressSpace(), NewAlign, @@ -17748,8 +18400,8 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { if (VTSize.isScalable()) return SDValue(); - bool FastLD = false, FastST = false; - EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize()); + unsigned FastLD = 0, FastST = 0; + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue()); if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || !TLI.isOperationLegal(ISD::STORE, IntVT) || !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || @@ -17892,7 +18544,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( unsigned SizeInBits = NumStores * ElementSizeBits; unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; - Optional<MachineMemOperand::Flags> Flags; + std::optional<MachineMemOperand::Flags> Flags; AAMDNodes AAInfo; for (unsigned I = 0; I != NumStores; ++I) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode); @@ -17967,6 +18619,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( // We may need to add a bitcast here to get types to line up. if (MemVTScalarTy != Val.getValueType().getScalarType()) { Val = DAG.getBitcast(MemVT, Val); + } else if (MemVT.isVector() && + Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val); } else { unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR : ISD::EXTRACT_VECTOR_ELT; @@ -18357,7 +19012,7 @@ bool DAGCombiner::tryStoreMergeOfConstants( // Find a legal type for the constant store. unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); - bool IsFast = false; + unsigned IsFast = 0; // Break early when size is too large to be legal. if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) @@ -18467,7 +19122,7 @@ bool DAGCombiner::tryStoreMergeOfExtracts( // Find a legal type for the vector store. unsigned Elts = (i + 1) * NumMemElts; EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); - bool IsFast = false; + unsigned IsFast = 0; // Break early when size is too large to be legal. if (Ty.getSizeInBits() > MaximumLegalStoreInBits) @@ -18620,8 +19275,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) break; - bool IsFastSt = false; - bool IsFastLd = false; + unsigned IsFastSt = 0; + unsigned IsFastLd = 0; // Don't try vector types if we need a rotate. We may still fail the // legality checks for the integer type, but we can't handle the rotate // case with vectors. @@ -19076,16 +19731,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), ST->getMemoryVT().getScalarSizeInBits()); - // See if we can simplify the input to this truncstore with knowledge that - // only the low bits are being used. For example: - // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" + // See if we can simplify the operation with SimplifyDemandedBits, which + // only works if the value has a single use. AddToWorklist(Value.getNode()); - if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits)) - return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(), - ST->getMemOperand()); - - // Otherwise, see if we can simplify the operation with - // SimplifyDemandedBits, which only works if the value has a single use. if (SimplifyDemandedBits(Value, TruncDemandedBits)) { // Re-visit the store if anything changed and the store hasn't been merged // with another node (N is deleted) SimplifyDemandedBits will add Value's @@ -19095,6 +19743,28 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { AddToWorklist(N); return SDValue(N, 0); } + + // Otherwise, see if we can simplify the input to this truncstore with + // knowledge that only the low bits are being used. For example: + // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" + if (SDValue Shorter = + TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG)) + return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(), + ST->getMemOperand()); + + // If we're storing a truncated constant, see if we can simplify it. + // TODO: Move this to targetShrinkDemandedConstant? + if (auto *Cst = dyn_cast<ConstantSDNode>(Value)) + if (!Cst->isOpaque()) { + const APInt &CValue = Cst->getAPIntValue(); + APInt NewVal = CValue & TruncDemandedBits; + if (NewVal != CValue) { + SDValue Shorter = + DAG.getConstant(NewVal, SDLoc(N), Value.getValueType()); + return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, + ST->getMemoryVT(), ST->getMemOperand()); + } + } } // If this is a load followed by a store to the same location, then the store @@ -19235,7 +19905,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) { // If we store purely within object bounds just before its lifetime ends, // we can remove the store. if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase, - StoreSize.getFixedSize() * 8)) { + StoreSize.getFixedValue() * 8)) { LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase.dump(); dbgs() << "\n"); @@ -19355,94 +20025,113 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { return St1; } -/// Convert a disguised subvector insertion into a shuffle: -SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { - assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && - "Expected extract_vector_elt"); - SDValue InsertVal = N->getOperand(1); - SDValue Vec = N->getOperand(0); +// Merge an insertion into an existing shuffle: +// (insert_vector_elt (vector_shuffle X, Y, Mask), +// .(extract_vector_elt X, N), InsIndex) +// --> (vector_shuffle X, Y, NewMask) +// and variations where shuffle operands may be CONCAT_VECTORS. +static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef<int> Mask, + SmallVectorImpl<int> &NewMask, SDValue Elt, + unsigned InsIndex) { + if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa<ConstantSDNode>(Elt.getOperand(1))) + return false; - // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), - // InsIndex) - // --> (vector_shuffle X, Y) and variations where shuffle operands may be - // CONCAT_VECTORS. - if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() && - InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && - isa<ConstantSDNode>(InsertVal.getOperand(1))) { - ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode()); - ArrayRef<int> Mask = SVN->getMask(); + // Vec's operand 0 is using indices from 0 to N-1 and + // operand 1 from N to 2N - 1, where N is the number of + // elements in the vectors. + SDValue InsertVal0 = Elt.getOperand(0); + int ElementOffset = -1; + + // We explore the inputs of the shuffle in order to see if we find the + // source of the extract_vector_elt. If so, we can use it to modify the + // shuffle rather than perform an insert_vector_elt. + SmallVector<std::pair<int, SDValue>, 8> ArgWorkList; + ArgWorkList.emplace_back(Mask.size(), Y); + ArgWorkList.emplace_back(0, X); + + while (!ArgWorkList.empty()) { + int ArgOffset; + SDValue ArgVal; + std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val(); + + if (ArgVal == InsertVal0) { + ElementOffset = ArgOffset; + break; + } - SDValue X = Vec.getOperand(0); - SDValue Y = Vec.getOperand(1); - - // Vec's operand 0 is using indices from 0 to N-1 and - // operand 1 from N to 2N - 1, where N is the number of - // elements in the vectors. - SDValue InsertVal0 = InsertVal.getOperand(0); - int ElementOffset = -1; - - // We explore the inputs of the shuffle in order to see if we find the - // source of the extract_vector_elt. If so, we can use it to modify the - // shuffle rather than perform an insert_vector_elt. - SmallVector<std::pair<int, SDValue>, 8> ArgWorkList; - ArgWorkList.emplace_back(Mask.size(), Y); - ArgWorkList.emplace_back(0, X); - - while (!ArgWorkList.empty()) { - int ArgOffset; - SDValue ArgVal; - std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val(); - - if (ArgVal == InsertVal0) { - ElementOffset = ArgOffset; - break; + // Peek through concat_vector. + if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) { + int CurrentArgOffset = + ArgOffset + ArgVal.getValueType().getVectorNumElements(); + int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements(); + for (SDValue Op : reverse(ArgVal->ops())) { + CurrentArgOffset -= Step; + ArgWorkList.emplace_back(CurrentArgOffset, Op); } - // Peek through concat_vector. - if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) { - int CurrentArgOffset = - ArgOffset + ArgVal.getValueType().getVectorNumElements(); - int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements(); - for (SDValue Op : reverse(ArgVal->ops())) { - CurrentArgOffset -= Step; - ArgWorkList.emplace_back(CurrentArgOffset, Op); - } - - // Make sure we went through all the elements and did not screw up index - // computation. - assert(CurrentArgOffset == ArgOffset); - } + // Make sure we went through all the elements and did not screw up index + // computation. + assert(CurrentArgOffset == ArgOffset); } + } - // If we failed to find a match, see if we can replace an UNDEF shuffle - // operand. - if (ElementOffset == -1 && Y.isUndef() && - InsertVal0.getValueType() == Y.getValueType()) { - ElementOffset = Mask.size(); - Y = InsertVal0; - } + // If we failed to find a match, see if we can replace an UNDEF shuffle + // operand. + if (ElementOffset == -1) { + if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType()) + return false; + ElementOffset = Mask.size(); + Y = InsertVal0; + } - if (ElementOffset != -1) { - SmallVector<int, 16> NewMask(Mask.begin(), Mask.end()); + NewMask.assign(Mask.begin(), Mask.end()); + NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1); + assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 && + "NewMask[InsIndex] is out of bound"); + return true; +} - auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1)); - NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue(); - assert(NewMask[InsIndex] < - (int)(2 * Vec.getValueType().getVectorNumElements()) && - NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound"); +// Merge an insertion into an existing shuffle: +// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), +// InsIndex) +// --> (vector_shuffle X, Y) and variations where shuffle operands may be +// CONCAT_VECTORS. +SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) { + assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && + "Expected extract_vector_elt"); + SDValue InsertVal = N->getOperand(1); + SDValue Vec = N->getOperand(0); - SDValue LegalShuffle = - TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X, - Y, NewMask, DAG); - if (LegalShuffle) - return LegalShuffle; - } + auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec); + if (!SVN || !Vec.hasOneUse()) + return SDValue(); + + ArrayRef<int> Mask = SVN->getMask(); + SDValue X = Vec.getOperand(0); + SDValue Y = Vec.getOperand(1); + + SmallVector<int, 16> NewMask(Mask); + if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) { + SDValue LegalShuffle = TLI.buildLegalVectorShuffle( + Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG); + if (LegalShuffle) + return LegalShuffle; } - // insert_vector_elt V, (bitcast X from vector type), IdxC --> - // bitcast(shuffle (bitcast V), (extended X), Mask) - // Note: We do not use an insert_subvector node because that requires a - // legal subvector type. + return SDValue(); +} + +// Convert a disguised subvector insertion into a shuffle: +// insert_vector_elt V, (bitcast X from vector type), IdxC --> +// bitcast(shuffle (bitcast V), (extended X), Mask) +// Note: We do not use an insert_subvector node because that requires a +// legal subvector type. +SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { + assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && + "Expected extract_vector_elt"); + SDValue InsertVal = N->getOperand(1); + if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() || !InsertVal.getOperand(0).getValueType().isVector()) return SDValue(); @@ -19517,13 +20206,8 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { if (!IndexC) { // If this is variable insert to undef vector, it might be better to splat: // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... > - if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) { - if (VT.isScalableVector()) - return DAG.getSplatVector(VT, DL, InVal); - - SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal); - return DAG.getBuildVector(VT, DL, Ops); - } + if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) + return DAG.getSplat(VT, DL, InVal); return SDValue(); } @@ -19535,9 +20219,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { // We must know which element is being inserted for folds below here. unsigned Elt = IndexC->getZExtValue(); - if (SDValue Shuf = combineInsertEltToShuffle(N, Elt)) - return Shuf; - // Handle <1 x ???> vector insertion special cases. if (NumElts == 1) { // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y @@ -19567,6 +20248,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { } } + if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt)) + return Shuf; + + if (SDValue Shuf = combineInsertEltToShuffle(N, Elt)) + return Shuf; + // Attempt to convert an insert_vector_elt chain into a legal build_vector. if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { // vXi1 vector - we don't need to recurse. @@ -19636,9 +20323,52 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { continue; } + // VECTOR_SHUFFLE - if all the operands match the shuffle's sources, + // update the shuffle mask (and second operand if we started with unary + // shuffle) and create a new legal shuffle. + if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) { + auto *SVN = cast<ShuffleVectorSDNode>(CurVec); + SDValue LHS = SVN->getOperand(0); + SDValue RHS = SVN->getOperand(1); + SmallVector<int, 16> Mask(SVN->getMask()); + bool Merged = true; + for (auto I : enumerate(Ops)) { + SDValue &Op = I.value(); + if (Op) { + SmallVector<int, 16> NewMask; + if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) { + Merged = false; + break; + } + Mask = std::move(NewMask); + } + } + if (Merged) + if (SDValue NewShuffle = + TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG)) + return NewShuffle; + } + // Failed to find a match in the chain - bail. break; } + + // See if we can fill in the missing constant elements as zeros. + // TODO: Should we do this for any constant? + APInt DemandedZeroElts = APInt::getZero(NumElts); + for (unsigned I = 0; I != NumElts; ++I) + if (!Ops[I]) + DemandedZeroElts.setBit(I); + + if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) { + SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT) + : DAG.getConstantFP(0, DL, MaxEltVT); + for (unsigned I = 0; I != NumElts; ++I) + if (!Ops[I]) + Ops[I] = Zero; + + return CanonicalizeBuildVector(Ops); + } } return SDValue(); @@ -19679,7 +20409,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8); } - bool IsFast = false; + unsigned IsFast = 0; if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT, OriginalLoad->getAddressSpace(), Alignment, OriginalLoad->getMemOperand()->getFlags(), @@ -19757,6 +20487,168 @@ static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, return SDValue(); } +// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract, +// recursively analyse all of it's users. and try to model themselves as +// bit sequence extractions. If all of them agree on the new, narrower element +// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that +// new element type, do so now. +// This is mainly useful to recover from legalization that scalarized +// the vector as wide elements, but tries to rebuild it with narrower elements. +// +// Some more nodes could be modelled if that helps cover interesting patterns. +bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts( + SDNode *N) { + // We perform this optimization post type-legalization because + // the type-legalizer often scalarizes integer-promoted vectors. + // Performing this optimization before may cause legalizaton cycles. + if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) + return false; + + // TODO: Add support for big-endian. + if (DAG.getDataLayout().isBigEndian()) + return false; + + SDValue VecOp = N->getOperand(0); + EVT VecVT = VecOp.getValueType(); + assert(!VecVT.isScalableVector() && "Only for fixed vectors."); + + // We must start with a constant extraction index. + auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!IndexC) + return false; + + assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() && + "Original ISD::EXTRACT_VECTOR_ELT is undefinend?"); + + // TODO: deal with the case of implicit anyext of the extraction. + unsigned VecEltBitWidth = VecVT.getScalarSizeInBits(); + EVT ScalarVT = N->getValueType(0); + if (VecVT.getScalarType() != ScalarVT) + return false; + + // TODO: deal with the cases other than everything being integer-typed. + if (!ScalarVT.isScalarInteger()) + return false; + + struct Entry { + SDNode *Producer; + + // Which bits of VecOp does it contain? + unsigned BitPos; + int NumBits; + // NOTE: the actual width of \p Producer may be wider than NumBits! + + Entry(Entry &&) = default; + Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_) + : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {} + + Entry() = delete; + Entry(const Entry &) = delete; + Entry &operator=(const Entry &) = delete; + Entry &operator=(Entry &&) = delete; + }; + SmallVector<Entry, 32> Worklist; + SmallVector<Entry, 32> Leafs; + + // We start at the "root" ISD::EXTRACT_VECTOR_ELT. + Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(), + /*NumBits=*/VecEltBitWidth); + + while (!Worklist.empty()) { + Entry E = Worklist.pop_back_val(); + // Does the node not even use any of the VecOp bits? + if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() && + E.BitPos + E.NumBits <= VecVT.getSizeInBits())) + return false; // Let's allow the other combines clean this up first. + // Did we fail to model any of the users of the Producer? + bool ProducerIsLeaf = false; + // Look at each user of this Producer. + for (SDNode *User : E.Producer->uses()) { + switch (User->getOpcode()) { + // TODO: support ISD::BITCAST + // TODO: support ISD::ANY_EXTEND + // TODO: support ISD::ZERO_EXTEND + // TODO: support ISD::SIGN_EXTEND + case ISD::TRUNCATE: + // Truncation simply means we keep position, but extract less bits. + Worklist.emplace_back(User, E.BitPos, + /*NumBits=*/User->getValueSizeInBits(0)); + break; + // TODO: support ISD::SRA + // TODO: support ISD::SHL + case ISD::SRL: + // We should be shifting the Producer by a constant amount. + if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1)); + User->getOperand(0).getNode() == E.Producer && ShAmtC) { + // Logical right-shift means that we start extraction later, + // but stop it at the same position we did previously. + unsigned ShAmt = ShAmtC->getZExtValue(); + Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt); + break; + } + [[fallthrough]]; + default: + // We can not model this user of the Producer. + // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT. + ProducerIsLeaf = true; + // Profitability check: all users that we can not model + // must be ISD::BUILD_VECTOR's. + if (User->getOpcode() != ISD::BUILD_VECTOR) + return false; + break; + } + } + if (ProducerIsLeaf) + Leafs.emplace_back(std::move(E)); + } + + unsigned NewVecEltBitWidth = Leafs.front().NumBits; + + // If we are still at the same element granularity, give up, + if (NewVecEltBitWidth == VecEltBitWidth) + return false; + + // The vector width must be a multiple of the new element width. + if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0) + return false; + + // All leafs must agree on the new element width. + // All leafs must not expect any "padding" bits ontop of that width. + // All leafs must start extraction from multiple of that width. + if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) { + return (unsigned)E.NumBits == NewVecEltBitWidth && + E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth && + E.BitPos % NewVecEltBitWidth == 0; + })) + return false; + + EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth); + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT, + VecVT.getSizeInBits() / NewVecEltBitWidth); + + if (LegalTypes && + !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT))) + return false; + + if (LegalOperations && + !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) && + TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, NewVecVT))) + return false; + + SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp); + for (const Entry &E : Leafs) { + SDLoc DL(E.Producer); + unsigned NewIndex = E.BitPos / NewVecEltBitWidth; + assert(NewIndex < NewVecVT.getVectorNumElements() && + "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?"); + SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp, + DAG.getVectorIdxConstant(NewIndex, DL)); + CombineTo(E.Producer, V); + } + + return true; +} + SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue VecOp = N->getOperand(0); SDValue Index = N->getOperand(1); @@ -19800,6 +20692,12 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { IndexC->getAPIntValue().uge(VecVT.getVectorNumElements())) return DAG.getUNDEF(ScalarVT); + // extract_vector_elt(freeze(x)), idx -> freeze(extract_vector_elt(x)), idx + if (VecOp.hasOneUse() && VecOp.getOpcode() == ISD::FREEZE) { + return DAG.getFreeze(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, + VecOp.getOperand(0), Index)); + } + // extract_vector_elt (build_vector x, y), 1 -> y if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) || VecOp.getOpcode() == ISD::SPLAT_VECTOR) && @@ -19845,7 +20743,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { unsigned BCTruncElt = IsLE ? 0 : NumElts - 1; SDValue BCSrc = VecOp.getOperand(0); if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger()) - return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc); + return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT); if (LegalTypes && BCSrc.getValueType().isInteger() && BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) { @@ -19945,6 +20843,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } } + if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N)) + return SDValue(N, 0); + // Everything under here is trying to match an extract of a loaded value. // If the result of load has to be truncated, then it's not necessarily // profitable. @@ -20186,7 +21087,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { // Simplify (build_vec (trunc $1) // (trunc (srl $1 half-width)) -// (trunc (srl $1 (2 * half-width))) …) +// (trunc (srl $1 (2 * half-width)))) // to (bitcast $1) SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) { assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector"); @@ -20339,6 +21240,29 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2)); ConcatOps[0] = VecIn2; VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); + } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) { + if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) || + !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2)) + return SDValue(); + // If dest vector has less than two elements, then use shuffle and extract + // from larger regs will cost even more. + if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode()) + return SDValue(); + assert(InVT2Size <= InVT1Size && + "Second input is not going to be larger than the first one."); + + // VecIn1 is wider than the output, and we have another, possibly + // smaller input. Pad the smaller input with undefs, shuffle at the + // input vector width, and extract the output. + // The shuffle type is different than VT, so check legality again. + if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1)) + return SDValue(); + + if (InVT1 != InVT2) { + VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1, + DAG.getUNDEF(InVT1), VecIn2, ZeroIdx); + } + ShuffleNumElems = InVT1Size / VTSize * NumElems; } else { // TODO: Support cases where the length mismatch isn't exactly by a // factor of 2. @@ -20779,6 +21703,127 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) { VT, In); } +// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND, +// and all other elements being constant zero's, granularize the BUILD_VECTOR's +// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op. +// This patten can appear during legalization. +// +// NOTE: This can be generalized to allow more than a single +// non-constant-zero op, UNDEF's, and to be KnownBits-based, +SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) { + // Don't run this after legalization. Targets may have other preferences. + if (Level >= AfterLegalizeDAG) + return SDValue(); + + // FIXME: support big-endian. + if (DAG.getDataLayout().isBigEndian()) + return SDValue(); + + EVT VT = N->getValueType(0); + EVT OpVT = N->getOperand(0).getValueType(); + assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?"); + + EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); + + if (!TLI.isTypeLegal(OpIntVT) || + (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT))) + return SDValue(); + + unsigned EltBitwidth = VT.getScalarSizeInBits(); + // NOTE: the actual width of operands may be wider than that! + + // Analyze all operands of this BUILD_VECTOR. What is the largest number of + // active bits they all have? We'll want to truncate them all to that width. + unsigned ActiveBits = 0; + APInt KnownZeroOps(VT.getVectorNumElements(), 0); + for (auto I : enumerate(N->ops())) { + SDValue Op = I.value(); + // FIXME: support UNDEF elements? + if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) { + unsigned OpActiveBits = + Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits(); + if (OpActiveBits == 0) { + KnownZeroOps.setBit(I.index()); + continue; + } + // Profitability check: don't allow non-zero constant operands. + return SDValue(); + } + // Profitability check: there must only be a single non-zero operand, + // and it must be the first operand of the BUILD_VECTOR. + if (I.index() != 0) + return SDValue(); + // The operand must be a zero-extension itself. + // FIXME: this could be generalized to known leading zeros check. + if (Op.getOpcode() != ISD::ZERO_EXTEND) + return SDValue(); + unsigned CurrActiveBits = + Op.getOperand(0).getValueSizeInBits().getFixedValue(); + assert(!ActiveBits && "Already encountered non-constant-zero operand?"); + ActiveBits = CurrActiveBits; + // We want to at least halve the element size. + if (2 * ActiveBits > EltBitwidth) + return SDValue(); + } + + // This BUILD_VECTOR must have at least one non-constant-zero operand. + if (ActiveBits == 0) + return SDValue(); + + // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits, + // into how many chunks can we split our element width? + EVT NewScalarIntVT, NewIntVT; + std::optional<unsigned> Factor; + // We can split the element into at least two chunks, but not into more + // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor + // for which the element width is a multiple of it, + // and the resulting types/operations on that chunk width are legal. + assert(2 * ActiveBits <= EltBitwidth && + "We know that half or less bits of the element are active."); + for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) { + if (EltBitwidth % Scale != 0) + continue; + unsigned ChunkBitwidth = EltBitwidth / Scale; + assert(ChunkBitwidth >= ActiveBits && "As per starting point."); + NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth); + NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT, + Scale * N->getNumOperands()); + if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) || + (LegalOperations && + !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) && + TLI.isOperationLegalOrCustom(ISD::BUILD_VECTOR, NewIntVT)))) + continue; + Factor = Scale; + break; + } + if (!Factor) + return SDValue(); + + SDLoc DL(N); + SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT); + + // Recreate the BUILD_VECTOR, with elements now being Factor times smaller. + SmallVector<SDValue, 16> NewOps; + NewOps.reserve(NewIntVT.getVectorNumElements()); + for (auto I : enumerate(N->ops())) { + SDValue Op = I.value(); + assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here."); + unsigned SrcOpIdx = I.index(); + if (KnownZeroOps[SrcOpIdx]) { + NewOps.append(*Factor, ZeroOp); + continue; + } + Op = DAG.getBitcast(OpIntVT, Op); + Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op); + NewOps.emplace_back(Op); + NewOps.append(*Factor - 1, ZeroOp); + } + assert(NewOps.size() == NewIntVT.getVectorNumElements()); + SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps); + NewBV = DAG.getBitcast(VT, NewBV); + return NewBV; +} + SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { EVT VT = N->getValueType(0); @@ -20844,6 +21889,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (SDValue V = convertBuildVecZextToZext(N)) return V; + if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N)) + return V; + if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) return V; @@ -21104,6 +22152,109 @@ static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(CastOpcode, DL, VT, NewConcat); } +// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of +// the operands is a SHUFFLE_VECTOR, and all other operands are also operands +// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR. +static SDValue combineConcatVectorOfShuffleAndItsOperands( + SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, + bool LegalOperations) { + EVT VT = N->getValueType(0); + EVT OpVT = N->getOperand(0).getValueType(); + if (VT.isScalableVector()) + return SDValue(); + + // For now, only allow simple 2-operand concatenations. + if (N->getNumOperands() != 2) + return SDValue(); + + // Don't create illegal types/shuffles when not allowed to. + if ((LegalTypes && !TLI.isTypeLegal(VT)) || + (LegalOperations && + !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))) + return SDValue(); + + // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them, + // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us, + // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR, + // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!). + // (4) and for now, the SHUFFLE_VECTOR must be unary. + ShuffleVectorSDNode *SVN = nullptr; + for (SDValue Op : N->ops()) { + if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op); + CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) && + all_of(N->ops(), [CurSVN](SDValue Op) { + // FIXME: can we allow UNDEF operands? + return !Op.isUndef() && + (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op)); + })) { + SVN = CurSVN; + break; + } + } + if (!SVN) + return SDValue(); + + // We are going to pad the shuffle operands, so any indice, that was picking + // from the second operand, must be adjusted. + SmallVector<int, 16> AdjustedMask; + AdjustedMask.reserve(SVN->getMask().size()); + assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!"); + append_range(AdjustedMask, SVN->getMask()); + + // Identity masks for the operands of the (padded) shuffle. + SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements()); + MutableArrayRef<int> FirstShufOpIdentityMask = + MutableArrayRef<int>(IdentityMask) + .take_front(OpVT.getVectorNumElements()); + MutableArrayRef<int> SecondShufOpIdentityMask = + MutableArrayRef<int>(IdentityMask).take_back(OpVT.getVectorNumElements()); + std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0); + std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(), + VT.getVectorNumElements()); + + // New combined shuffle mask. + SmallVector<int, 32> Mask; + Mask.reserve(VT.getVectorNumElements()); + for (SDValue Op : N->ops()) { + assert(!Op.isUndef() && "Not expecting to concatenate UNDEF."); + if (Op.getNode() == SVN) { + append_range(Mask, AdjustedMask); + continue; + } + if (Op == SVN->getOperand(0)) { + append_range(Mask, FirstShufOpIdentityMask); + continue; + } + if (Op == SVN->getOperand(1)) { + append_range(Mask, SecondShufOpIdentityMask); + continue; + } + llvm_unreachable("Unexpected operand!"); + } + + // Don't create illegal shuffle masks. + if (!TLI.isShuffleMaskLegal(Mask, VT)) + return SDValue(); + + // Pad the shuffle operands with UNDEF. + SDLoc dl(N); + std::array<SDValue, 2> ShufOps; + for (auto I : zip(SVN->ops(), ShufOps)) { + SDValue ShufOp = std::get<0>(I); + SDValue &NewShufOp = std::get<1>(I); + if (ShufOp.isUndef()) + NewShufOp = DAG.getUNDEF(VT); + else { + SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(), + DAG.getUNDEF(OpVT)); + ShufOpParts[0] = ShufOp; + NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts); + } + } + // Finally, create the new wide shuffle. + return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask); +} + SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // If we only have one input vector, we don't need to do any concatenation. if (N->getNumOperands() == 1) @@ -21239,6 +22390,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (SDValue V = combineConcatVectorOfCasts(N, DAG)) return V; + if (SDValue V = combineConcatVectorOfShuffleAndItsOperands( + N, DAG, TLI, LegalTypes, LegalOperations)) + return V; + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR // operands and look for a CONCAT operations that place the incoming vectors @@ -21516,7 +22671,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { MachinePointerInfo(Ld->getPointerInfo().getAddrSpace()); MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize); } else - MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(), + MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(), StoreSize); SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO); @@ -22076,14 +23231,53 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, return DAG.getBuildVector(VT, SDLoc(SVN), Ops); } +// Match shuffles that can be converted to *_vector_extend_in_reg. +// This is often generated during legalization. +// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)), +// and returns the EVT to which the extension should be performed. +// NOTE: this assumes that the src is the first operand of the shuffle. +static std::optional<EVT> canCombineShuffleToExtendVectorInreg( + unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match, + SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, + bool LegalOperations) { + bool IsBigEndian = DAG.getDataLayout().isBigEndian(); + + // TODO Add support for big-endian when we have a test case. + if (!VT.isInteger() || IsBigEndian) + return std::nullopt; + + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + + // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for + // power-of-2 extensions as they are the most likely. + // FIXME: should try Scale == NumElts case too, + for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) { + // The vector width must be a multiple of Scale. + if (NumElts % Scale != 0) + continue; + + EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale); + EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale); + + if ((LegalTypes && !TLI.isTypeLegal(OutVT)) || + (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT))) + continue; + + if (Match(Scale)) + return OutVT; + } + + return std::nullopt; +} + // Match shuffles that can be converted to any_vector_extend_in_reg. // This is often generated during legalization. // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)) -// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case. -static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, - SelectionDAG &DAG, - const TargetLowering &TLI, - bool LegalOperations) { +static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG, + const TargetLowering &TLI, + bool LegalOperations) { EVT VT = SVN->getValueType(0); bool IsBigEndian = DAG.getDataLayout().isBigEndian(); @@ -22091,13 +23285,9 @@ static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, if (!VT.isInteger() || IsBigEndian) return SDValue(); - unsigned NumElts = VT.getVectorNumElements(); - unsigned EltSizeInBits = VT.getScalarSizeInBits(); - ArrayRef<int> Mask = SVN->getMask(); - SDValue N0 = SVN->getOperand(0); - // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32)) - auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) { + auto isAnyExtend = [NumElts = VT.getVectorNumElements(), + Mask = SVN->getMask()](unsigned Scale) { for (unsigned i = 0; i != NumElts; ++i) { if (Mask[i] < 0) continue; @@ -22108,27 +23298,138 @@ static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, return true; }; - // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for - // power-of-2 extensions as they are the most likely. - for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) { - // Check for non power of 2 vector sizes - if (NumElts % Scale != 0) - continue; - if (!isAnyExtend(Scale)) - continue; + unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG; + SDValue N0 = SVN->getOperand(0); + // Never create an illegal type. Only create unsupported operations if we + // are pre-legalization. + std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg( + Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations); + if (!OutVT) + return SDValue(); + return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0)); +} - EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale); - EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale); - // Never create an illegal type. Only create unsupported operations if we - // are pre-legalization. - if (TLI.isTypeLegal(OutVT)) - if (!LegalOperations || - TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT)) - return DAG.getBitcast(VT, - DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, - SDLoc(SVN), OutVT, N0)); - } +// Match shuffles that can be converted to zero_extend_vector_inreg. +// This is often generated during legalization. +// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src)) +static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG, + const TargetLowering &TLI, + bool LegalOperations) { + bool LegalTypes = true; + EVT VT = SVN->getValueType(0); + assert(!VT.isScalableVector() && "Encountered scalable shuffle?"); + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + + // TODO: add support for big-endian when we have a test case. + bool IsBigEndian = DAG.getDataLayout().isBigEndian(); + if (!VT.isInteger() || IsBigEndian) + return SDValue(); + + SmallVector<int, 16> Mask(SVN->getMask().begin(), SVN->getMask().end()); + auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) { + for (int &Indice : Mask) { + if (Indice < 0) + continue; + int OpIdx = (unsigned)Indice < NumElts ? 0 : 1; + int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts; + Fn(Indice, OpIdx, OpEltIdx); + } + }; + + // Which elements of which operand does this shuffle demand? + std::array<APInt, 2> OpsDemandedElts; + for (APInt &OpDemandedElts : OpsDemandedElts) + OpDemandedElts = APInt::getZero(NumElts); + ForEachDecomposedIndice( + [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) { + OpsDemandedElts[OpIdx].setBit(OpEltIdx); + }); + + // Element-wise(!), which of these demanded elements are know to be zero? + std::array<APInt, 2> OpsKnownZeroElts; + for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts)) + std::get<2>(I) = + DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I)); + + // Manifest zeroable element knowledge in the shuffle mask. + // NOTE: we don't have 'zeroable' sentinel value in generic DAG, + // this is a local invention, but it won't leak into DAG. + // FIXME: should we not manifest them, but just check when matching? + bool HadZeroableElts = false; + ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts]( + int &Indice, int OpIdx, int OpEltIdx) { + if (OpsKnownZeroElts[OpIdx][OpEltIdx]) { + Indice = -2; // Zeroable element. + HadZeroableElts = true; + } + }); + + // Don't proceed unless we've refined at least one zeroable mask indice. + // If we didn't, then we are still trying to match the same shuffle mask + // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG, + // and evidently failed. Proceeding will lead to endless combine loops. + if (!HadZeroableElts) + return SDValue(); + + // The shuffle may be more fine-grained than we want. Widen elements first. + // FIXME: should we do this before manifesting zeroable shuffle mask indices? + SmallVector<int, 16> ScaledMask; + getShuffleMaskWithWidestElts(Mask, ScaledMask); + assert(Mask.size() >= ScaledMask.size() && + Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening."); + int Prescale = Mask.size() / ScaledMask.size(); + + NumElts = ScaledMask.size(); + EltSizeInBits *= Prescale; + + EVT PrescaledVT = EVT::getVectorVT( + *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits), + NumElts); + + if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT)) + return SDValue(); + + // For example, + // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32)) + // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types) + auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) { + assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 && + "Unexpected mask scaling factor."); + ArrayRef<int> Mask = ScaledMask; + for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale; + SrcElt != NumSrcElts; ++SrcElt) { + // Analyze the shuffle mask in Scale-sized chunks. + ArrayRef<int> MaskChunk = Mask.take_front(Scale); + assert(MaskChunk.size() == Scale && "Unexpected mask size."); + Mask = Mask.drop_front(MaskChunk.size()); + // The first indice in this chunk must be SrcElt, but not zero! + // FIXME: undef should be fine, but that results in more-defined result. + if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt) + return false; + // The rest of the indices in this chunk must be zeros. + // FIXME: undef should be fine, but that results in more-defined result. + if (!all_of(MaskChunk.drop_front(1), + [](int Indice) { return Indice == -2; })) + return false; + } + assert(Mask.empty() && "Did not process the whole mask?"); + return true; + }; + unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG; + for (bool Commuted : {false, true}) { + SDValue Op = SVN->getOperand(!Commuted ? 0 : 1); + if (Commuted) + ShuffleVectorSDNode::commuteMask(ScaledMask); + std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg( + Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes, + LegalOperations); + if (OutVT) + return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, + DAG.getBitcast(PrescaledVT, Op))); + } return SDValue(); } @@ -22200,9 +23501,52 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, // the masks of the shuffles. static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) { + EVT VT = Shuf->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + if (!Shuf->getOperand(1).isUndef()) return SDValue(); + // See if this unary non-splat shuffle actually *is* a splat shuffle, + // in disguise, with all demanded elements being identical. + // FIXME: this can be done per-operand. + if (!Shuf->isSplat()) { + APInt DemandedElts(NumElts, 0); + for (int Idx : Shuf->getMask()) { + if (Idx < 0) + continue; // Ignore sentinel indices. + assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?"); + DemandedElts.setBit(Idx); + } + assert(DemandedElts.countPopulation() > 1 && "Is a splat shuffle already?"); + APInt UndefElts; + if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) { + // Even if all demanded elements are splat, some of them could be undef. + // Which lowest demanded element is *not* known-undef? + std::optional<unsigned> MinNonUndefIdx; + for (int Idx : Shuf->getMask()) { + if (Idx < 0 || UndefElts[Idx]) + continue; // Ignore sentinel indices, and undef elements. + MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U)); + } + if (!MinNonUndefIdx) + return DAG.getUNDEF(VT); // All undef - result is undef. + assert(*MinNonUndefIdx < NumElts && "Expected valid element index."); + SmallVector<int, 8> SplatMask(Shuf->getMask().begin(), + Shuf->getMask().end()); + for (int &Idx : SplatMask) { + if (Idx < 0) + continue; // Passthrough sentinel indices. + // Otherwise, just pick the lowest demanded non-undef element. + // Or sentinel undef, if we know we'd pick a known-undef element. + Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx; + } + assert(SplatMask != Shuf->getMask() && "Expected mask to change!"); + return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0), + Shuf->getOperand(1), SplatMask); + } + } + // If the inner operand is a known splat with no undefs, just return that directly. // TODO: Create DemandedElts mask from Shuf's mask. // TODO: Allow undef elements and merge with the shuffle code below. @@ -22386,7 +23730,7 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, // First, check if we are taking one element of a vector and shuffling that // element into another vector. ArrayRef<int> Mask = Shuf->getMask(); - SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end()); + SmallVector<int, 16> CommutedMask(Mask); SDValue Op0 = Shuf->getOperand(0); SDValue Op1 = Shuf->getOperand(1); int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask); @@ -22540,6 +23884,23 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2))) if (Idx->getAPIntValue() == SplatIndex) return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1)); + + // Look through a bitcast if LE and splatting lane 0, through to a + // scalar_to_vector or a build_vector. + if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() && + SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() && + (N0.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR || + N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR)) { + EVT N00VT = N0.getOperand(0).getValueType(); + if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() && + VT.isInteger() && N00VT.isInteger()) { + EVT InVT = + TLI.getTypeToTransformTo(*DAG.getContext(), VT.getScalarType()); + SDValue Op = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), + SDLoc(N), InVT); + return DAG.getSplatBuildVector(VT, SDLoc(N), Op); + } + } } // If this is a bit convert that changes the element type of the vector but @@ -22600,7 +23961,8 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return ShufOp; // Match shuffles that can be converted to any_vector_extend_in_reg. - if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations)) + if (SDValue V = + combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations)) return V; // Combine "truncate_vector_in_reg" style shuffles. @@ -22697,7 +24059,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask)) return InsertN1; if (N0.getOpcode() == ISD::CONCAT_VECTORS) { - SmallVector<int> CommuteMask(Mask.begin(), Mask.end()); + SmallVector<int> CommuteMask(Mask); ShuffleVectorSDNode::commuteMask(CommuteMask); if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask)) return InsertN0; @@ -23086,55 +24448,101 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG)) return V; + // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG. + // Perform this really late, because it could eliminate knowledge + // of undef elements created by this shuffle. + if (Level < AfterLegalizeTypes) + if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI, + LegalOperations)) + return V; + return SDValue(); } SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { - SDValue InVal = N->getOperand(0); EVT VT = N->getValueType(0); + if (!VT.isFixedLengthVector()) + return SDValue(); + + // Try to convert a scalar binop with an extracted vector element to a vector + // binop. This is intended to reduce potentially expensive register moves. + // TODO: Check if both operands are extracted. + // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT(). + SDValue Scalar = N->getOperand(0); + unsigned Opcode = Scalar.getOpcode(); + EVT VecEltVT = VT.getScalarType(); + if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 && + TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT && + Scalar.getOperand(0).getValueType() == VecEltVT && + Scalar.getOperand(1).getValueType() == VecEltVT && + DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) { + // Match an extract element and get a shuffle mask equivalent. + SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1); + + for (int i : {0, 1}) { + // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...} + // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...} + SDValue EE = Scalar.getOperand(i); + auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1)); + if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + EE.getOperand(0).getValueType() == VT && + isa<ConstantSDNode>(EE.getOperand(1))) { + // Mask = {ExtractIndex, undef, undef....} + ShufMask[0] = EE.getConstantOperandVal(1); + // Make sure the shuffle is legal if we are crossing lanes. + if (TLI.isShuffleMaskLegal(ShufMask, VT)) { + SDLoc DL(N); + SDValue V[] = {EE.getOperand(0), + DAG.getConstant(C->getAPIntValue(), DL, VT)}; + SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]); + return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT), + ShufMask); + } + } + } + } // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern // with a VECTOR_SHUFFLE and possible truncate. - if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && - VT.isFixedLengthVector() && - InVal->getOperand(0).getValueType().isFixedLengthVector()) { - SDValue InVec = InVal->getOperand(0); - SDValue EltNo = InVal->getOperand(1); - auto InVecT = InVec.getValueType(); - if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) { - SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1); - int Elt = C0->getZExtValue(); - NewMask[0] = Elt; - // If we have an implict truncate do truncate here as long as it's legal. - // if it's not legal, this should - if (VT.getScalarType() != InVal.getValueType() && - InVal.getValueType().isScalarInteger() && - isTypeLegal(VT.getScalarType())) { - SDValue Val = - DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal); - return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val); - } - if (VT.getScalarType() == InVecT.getScalarType() && - VT.getVectorNumElements() <= InVecT.getVectorNumElements()) { - SDValue LegalShuffle = - TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec, - DAG.getUNDEF(InVecT), NewMask, DAG); - if (LegalShuffle) { - // If the initial vector is the correct size this shuffle is a - // valid result. - if (VT == InVecT) - return LegalShuffle; - // If not we must truncate the vector. - if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) { - SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N)); - EVT SubVT = EVT::getVectorVT(*DAG.getContext(), - InVecT.getVectorElementType(), - VT.getVectorNumElements()); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, - LegalShuffle, ZeroIdx); - } - } - } + if (Opcode != ISD::EXTRACT_VECTOR_ELT || + !Scalar.getOperand(0).getValueType().isFixedLengthVector()) + return SDValue(); + + // If we have an implicit truncate, truncate here if it is legal. + if (VecEltVT != Scalar.getValueType() && + Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) { + SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val); + } + + auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1)); + if (!ExtIndexC) + return SDValue(); + + SDValue SrcVec = Scalar.getOperand(0); + EVT SrcVT = SrcVec.getValueType(); + unsigned SrcNumElts = SrcVT.getVectorNumElements(); + unsigned VTNumElts = VT.getVectorNumElements(); + if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) { + // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...} + SmallVector<int, 8> Mask(SrcNumElts, -1); + Mask[0] = ExtIndexC->getZExtValue(); + SDValue LegalShuffle = TLI.buildLegalVectorShuffle( + SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG); + if (!LegalShuffle) + return SDValue(); + + // If the initial vector is the same size, the shuffle is the result. + if (VT == SrcVT) + return LegalShuffle; + + // If not, shorten the shuffled vector. + if (VTNumElts != SrcNumElts) { + SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N)); + EVT SubVT = EVT::getVectorVT(*DAG.getContext(), + SrcVT.getVectorElementType(), VTNumElts); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle, + ZeroIdx); } } @@ -23364,6 +24772,15 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) { } SDValue DAGCombiner::visitVPOp(SDNode *N) { + + if (N->getOpcode() == ISD::VP_GATHER) + if (SDValue SD = visitVPGATHER(N)) + return SD; + + if (N->getOpcode() == ISD::VP_SCATTER) + if (SDValue SD = visitVPSCATTER(N)) + return SD; + // VP operations in which all vector elements are disabled - either by // determining that the mask is all false or that the EVL is 0 - can be // eliminated. @@ -23532,10 +24949,40 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, } // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index - if (VT.isScalableVector()) - return DAG.getSplatVector(VT, DL, ScalarBO); - SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO); - return DAG.getBuildVector(VT, DL, Ops); + return DAG.getSplat(VT, DL, ScalarBO); +} + +/// Visit a vector cast operation, like FP_EXTEND. +SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) { + EVT VT = N->getValueType(0); + assert(VT.isVector() && "SimplifyVCastOp only works on vectors!"); + EVT EltVT = VT.getVectorElementType(); + unsigned Opcode = N->getOpcode(); + + SDValue N0 = N->getOperand(0); + EVT SrcVT = N0->getValueType(0); + EVT SrcEltVT = SrcVT.getVectorElementType(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // TODO: promote operation might be also good here? + int Index0; + SDValue Src0 = DAG.getSplatSourceVector(N0, Index0); + if (Src0 && + (N0.getOpcode() == ISD::SPLAT_VECTOR || + TLI.isExtractVecEltCheap(VT, Index0)) && + TLI.isOperationLegalOrCustom(Opcode, EltVT) && + TLI.preferScalarizeSplat(Opcode)) { + SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL); + SDValue Elt = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC); + SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags()); + if (VT.isScalableVector()) + return DAG.getSplatVector(VT, DL, ScalarBO); + SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO); + return DAG.getBuildVector(VT, DL, Ops); + } + + return SDValue(); } /// Visit a binary vector operation, like ADD. @@ -23555,9 +25002,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) { // same types of operations that are in the original sequence. We do have to // restrict ops like integer div that have immediate UB (eg, div-by-zero) // though. This code is adapted from the identical transform in instcombine. - if (Opcode != ISD::UDIV && Opcode != ISD::SDIV && - Opcode != ISD::UREM && Opcode != ISD::SREM && - Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) { + if (DAG.isSafeToSpeculativelyExecute(Opcode)) { auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS); auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS); if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) && @@ -23575,7 +25020,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) { // demanded elements analysis. It is further limited to not change a splat // of an inserted scalar because that may be optimized better by // load-folding or other target-specific behaviors. - if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) && + if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) && Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() && Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) { // binop (splat X), (splat C) --> splat (binop X, C) @@ -23584,7 +25029,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) { return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT), Shuf0->getMask()); } - if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) && + if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) && Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() && Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) { // binop (splat C), (splat X) --> splat (binop C, X) @@ -23657,7 +25102,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) { SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2) { - assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); + assert(N0.getOpcode() == ISD::SETCC && + "First argument must be a SetCC node!"); SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, cast<CondCodeSDNode>(N0.getOperand(2))->get()); @@ -24132,7 +25578,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC)) return V; - // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) + // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A) // where y is has a single bit set. // A plaintext description would be, we can turn the SELECT_CC into an AND // when the condition can be materialized as an all-ones register. Any @@ -24583,7 +26029,7 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { bool IsAtomic; SDValue BasePtr; int64_t Offset; - Optional<int64_t> NumBytes; + std::optional<int64_t> NumBytes; MachineMemOperand *MMO; }; @@ -24598,21 +26044,26 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { : 0; uint64_t Size = MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize()); - return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(), + return {LSN->isVolatile(), + LSN->isAtomic(), + LSN->getBasePtr(), Offset /*base offset*/, - Optional<int64_t>(Size), + std::optional<int64_t>(Size), LSN->getMemOperand()}; } if (const auto *LN = cast<LifetimeSDNode>(N)) - return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1), + return {false /*isVolatile*/, + /*isAtomic*/ false, + LN->getOperand(1), (LN->hasOffset()) ? LN->getOffset() : 0, - (LN->hasOffset()) ? Optional<int64_t>(LN->getSize()) - : Optional<int64_t>(), + (LN->hasOffset()) ? std::optional<int64_t>(LN->getSize()) + : std::optional<int64_t>(), (MachineMemOperand *)nullptr}; // Default. - return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(), - (int64_t)0 /*offset*/, - Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr}; + return {false /*isvolatile*/, + /*isAtomic*/ false, SDValue(), + (int64_t)0 /*offset*/, std::optional<int64_t>() /*size*/, + (MachineMemOperand *)nullptr}; }; MemUseCharacteristics MUC0 = getCharacteristics(Op0), @@ -24839,13 +26290,6 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return DAG.getTokenFactor(SDLoc(N), Aliases); } -namespace { -// TODO: Replace with with std::monostate when we move to C++17. -struct UnitT { } Unit; -bool operator==(const UnitT &, const UnitT &) { return true; } -bool operator!=(const UnitT &, const UnitT &) { return false; } -} // namespace - // This function tries to collect a bunch of potentially interesting // nodes to improve the chains of, all at once. This might seem // redundant, as this function gets called when visiting every store @@ -24866,8 +26310,8 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { // the common case, every store writes to the immediately previous address // space and thus merged with the previous interval at insertion time. - using IMap = - llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>; + using IMap = llvm::IntervalMap<int64_t, std::monostate, 8, + IntervalMapHalfOpenInfo<int64_t>>; IMap::Allocator A; IMap Intervals(A); @@ -24894,7 +26338,8 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { return false; // Add ST's interval. - Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit); + Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, + std::monostate{}); while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) { if (Chain->getMemoryVT().isScalableVector()) @@ -24923,7 +26368,7 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { // If there's a previous interval, we should start after it. if (I != Intervals.begin() && (--I).stop() <= Offset) break; - Intervals.insert(Offset, Offset + Length, Unit); + Intervals.insert(Offset, Offset + Length, std::monostate{}); ChainedStores.push_back(Chain); STChain = Chain; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index ff5779967e22..2f2ae6e29855 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -42,7 +42,6 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -105,6 +104,7 @@ #include <cassert> #include <cstdint> #include <iterator> +#include <optional> #include <utility> using namespace llvm; @@ -319,7 +319,7 @@ Register FastISel::materializeConstant(const Value *V, MVT VT) { Reg = lookUpRegForValue(Op); } else if (isa<UndefValue>(V)) { Reg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::IMPLICIT_DEF), Reg); } return Reg; @@ -405,11 +405,6 @@ void FastISel::recomputeInsertPt() { ++FuncInfo.InsertPt; } else FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI(); - - // Now skip past any EH_LABELs, which must remain at the beginning. - while (FuncInfo.InsertPt != FuncInfo.MBB->end() && - FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL) - ++FuncInfo.InsertPt; } void FastISel::removeDeadCode(MachineBasicBlock::iterator I, @@ -696,20 +691,20 @@ bool FastISel::selectStackmap(const CallInst *I) { // Issue CALLSEQ_START unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); auto Builder = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown)); const MCInstrDesc &MCID = Builder.getInstr()->getDesc(); for (unsigned I = 0, E = MCID.getNumOperands(); I < E; ++I) Builder.addImm(0); // Issue STACKMAP. - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::STACKMAP)); for (auto const &MO : Ops) MIB.add(MO); // Issue CALLSEQ_END unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp)) .addImm(0) .addImm(0); @@ -878,7 +873,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) { /*isImp=*/true)); // Insert the patchpoint instruction before the call generated by the target. - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, DbgLoc, + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, MIMD, TII.get(TargetOpcode::PATCHPOINT)); for (auto &MO : Ops) @@ -907,7 +902,7 @@ bool FastISel::selectXRayCustomEvent(const CallInst *I) { Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)), /*isDef=*/false)); MachineInstrBuilder MIB = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::PATCHABLE_EVENT_CALL)); for (auto &MO : Ops) MIB.add(MO); @@ -928,7 +923,7 @@ bool FastISel::selectXRayTypedEvent(const CallInst *I) { Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)), /*isDef=*/false)); MachineInstrBuilder MIB = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL)); for (auto &MO : Ops) MIB.add(MO); @@ -1139,9 +1134,8 @@ bool FastISel::lowerCall(const CallInst *CI) { bool IsTailCall = CI->isTailCall(); if (IsTailCall && !isInTailCallPosition(*CI, TM)) IsTailCall = false; - if (IsTailCall && MF->getFunction() - .getFnAttribute("disable-tail-calls") - .getValueAsBool()) + if (IsTailCall && !CI->isMustTailCall() && + MF->getFunction().getFnAttribute("disable-tail-calls").getValueAsBool()) IsTailCall = false; CallLoweringInfo CLI; @@ -1171,7 +1165,7 @@ bool FastISel::selectCall(const User *I) { ExtraInfo |= InlineAsm::Extra_IsConvergent; ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::INLINEASM)); MIB.addExternalSymbol(IA->getAsmString().c_str()); MIB.addImm(ExtraInfo); @@ -1229,7 +1223,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX) return true; - Optional<MachineOperand> Op; + std::optional<MachineOperand> Op; if (Register Reg = lookUpRegForValue(Address)) Op = MachineOperand::CreateReg(Reg, false); @@ -1251,24 +1245,24 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { false); if (Op) { - assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && + assert(DI->getVariable()->isValidLocationForIntrinsic(MIMD.getDL()) && "Expected inlined-at fields to agree"); - // A dbg.declare describes the address of a source variable, so lower it - // into an indirect DBG_VALUE. - auto Builder = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, *Op, - DI->getVariable(), DI->getExpression()); - - // If using instruction referencing, mutate this into a DBG_INSTR_REF, - // to be later patched up by finalizeDebugInstrRefs. Tack a deref onto - // the expression, we don't have an "indirect" flag in DBG_INSTR_REF. - if (UseInstrRefDebugInfo && Op->isReg()) { - Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF)); - Builder->getOperand(1).ChangeToImmediate(0); - auto *NewExpr = - DIExpression::prepend(DI->getExpression(), DIExpression::DerefBefore); - Builder->getOperand(3).setMetadata(NewExpr); + if (FuncInfo.MF->useDebugInstrRef() && Op->isReg()) { + // If using instruction referencing, produce this as a DBG_INSTR_REF, + // to be later patched up by finalizeDebugInstrRefs. Tack a deref onto + // the expression, we don't have an "indirect" flag in DBG_INSTR_REF. + SmallVector<uint64_t, 3> Ops( + {dwarf::DW_OP_LLVM_arg, 0, dwarf::DW_OP_deref}); + auto *NewExpr = DIExpression::prependOpcodes(DI->getExpression(), Ops); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), + TII.get(TargetOpcode::DBG_INSTR_REF), /*IsIndirect*/ false, *Op, + DI->getVariable(), NewExpr); + } else { + // A dbg.declare describes the address of a source variable, so lower it + // into an indirect DBG_VALUE. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), + TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, *Op, + DI->getVariable(), DI->getExpression()); } } else { // We can't yet handle anything else here because it would require @@ -1283,12 +1277,12 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { const DbgValueInst *DI = cast<DbgValueInst>(II); const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); const Value *V = DI->getValue(); - assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && + assert(DI->getVariable()->isValidLocationForIntrinsic(MIMD.getDL()) && "Expected inlined-at fields to agree"); if (!V || isa<UndefValue>(V) || DI->hasArgList()) { // DI is either undef or cannot produce a valid DBG_VALUE, so produce an // undef DBG_VALUE to terminate any prior location. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II, false, 0U, DI->getVariable(), DI->getExpression()); } else if (const auto *CI = dyn_cast<ConstantInt>(V)) { // See if there's an expression to constant-fold. @@ -1296,35 +1290,42 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { if (Expr) std::tie(Expr, CI) = Expr->constantFold(CI); if (CI->getBitWidth() > 64) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addCImm(CI) .addImm(0U) .addMetadata(DI->getVariable()) .addMetadata(Expr); else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addImm(CI->getZExtValue()) .addImm(0U) .addMetadata(DI->getVariable()) .addMetadata(Expr); } else if (const auto *CF = dyn_cast<ConstantFP>(V)) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addFPImm(CF) .addImm(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); } else if (Register Reg = lookUpRegForValue(V)) { // FIXME: This does not handle register-indirect values at offset 0. - bool IsIndirect = false; - auto Builder = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg, - DI->getVariable(), DI->getExpression()); - - // If using instruction referencing, mutate this into a DBG_INSTR_REF, - // to be later patched up by finalizeDebugInstrRefs. - if (UseInstrRefDebugInfo) { - Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF)); - Builder->getOperand(1).ChangeToImmediate(0); + if (!FuncInfo.MF->useDebugInstrRef()) { + bool IsIndirect = false; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II, IsIndirect, + Reg, DI->getVariable(), DI->getExpression()); + } else { + // If using instruction referencing, produce this as a DBG_INSTR_REF, + // to be later patched up by finalizeDebugInstrRefs. + SmallVector<MachineOperand, 1> MOs({MachineOperand::CreateReg( + /* Reg */ Reg, /* isDef */ false, /* isImp */ false, + /* isKill */ false, /* isDead */ false, + /* isUndef */ false, /* isEarlyClobber */ false, + /* SubReg */ 0, /* isDebug */ true)}); + SmallVector<uint64_t, 2> Ops({dwarf::DW_OP_LLVM_arg, 0}); + auto *NewExpr = DIExpression::prependOpcodes(DI->getExpression(), Ops); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), + TII.get(TargetOpcode::DBG_INSTR_REF), /*IsIndirect*/ false, MOs, + DI->getVariable(), NewExpr); } } else { // We don't know how to handle other cases, so we drop. @@ -1340,7 +1341,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { return true; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel()); return true; } @@ -1448,7 +1449,7 @@ bool FastISel::selectFreeze(const User *I) { MVT Ty = ETy.getSimpleVT(); const TargetRegisterClass *TyRegClass = TLI.getRegClassFor(Ty); Register ResultReg = createResultReg(TyRegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), ResultReg).addReg(Reg); updateValueMap(I, ResultReg); @@ -1500,7 +1501,7 @@ bool FastISel::selectInstruction(const Instruction *I) { if (Call->getOperandBundleAt(i).getTagID() != LLVMContext::OB_funclet) return false; - DbgLoc = I->getDebugLoc(); + MIMD = MIMetadata(*I); SavedInsertPt = FuncInfo.InsertPt; @@ -1525,7 +1526,7 @@ bool FastISel::selectInstruction(const Instruction *I) { if (!SkipTargetIndependentISel) { if (selectOperator(I, I->getOpcode())) { ++NumFastIselSuccessIndependent; - DbgLoc = DebugLoc(); + MIMD = {}; return true; } // Remove dead code. @@ -1537,7 +1538,7 @@ bool FastISel::selectInstruction(const Instruction *I) { // Next, try calling the target to attempt to handle the instruction. if (fastSelectInstruction(I)) { ++NumFastIselSuccessTarget; - DbgLoc = DebugLoc(); + MIMD = {}; return true; } // Remove dead code. @@ -1545,7 +1546,7 @@ bool FastISel::selectInstruction(const Instruction *I) { if (SavedInsertPt != FuncInfo.InsertPt) removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); - DbgLoc = DebugLoc(); + MIMD = {}; // Undo phi node updates, because they will be added again by SelectionDAG. if (I->isTerminator()) { // PHI node handling may have generated local value instructions. @@ -1593,7 +1594,7 @@ void FastISel::finishCondBranch(const BasicBlock *BranchBB, FuncInfo.MBB->addSuccessorWithoutProb(TrueMBB); } - fastEmitBranch(FalseMBB, DbgLoc); + fastEmitBranch(FalseMBB, MIMD.getDL()); } /// Emit an FNeg operation. @@ -1906,7 +1907,7 @@ Register FastISel::constrainOperandRegClass(const MCInstrDesc &II, Register Op, // If it's not legal to COPY between the register classes, something // has gone very wrong before we got here. Register NewOp = createResultReg(RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), NewOp).addReg(Op); return NewOp; } @@ -1919,7 +1920,7 @@ Register FastISel::fastEmitInst_(unsigned MachineInstOpcode, Register ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg); return ResultReg; } @@ -1931,13 +1932,14 @@ Register FastISel::fastEmitInst_r(unsigned MachineInstOpcode, Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) .addReg(Op0); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addReg(Op0); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), + ResultReg) + .addReg(II.implicit_defs()[0]); } return ResultReg; @@ -1953,15 +1955,16 @@ Register FastISel::fastEmitInst_rr(unsigned MachineInstOpcode, Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) .addReg(Op0) .addReg(Op1); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addReg(Op0) .addReg(Op1); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), + ResultReg) + .addReg(II.implicit_defs()[0]); } return ResultReg; } @@ -1977,17 +1980,18 @@ Register FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode, Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) .addReg(Op0) .addReg(Op1) .addReg(Op2); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addReg(Op0) .addReg(Op1) .addReg(Op2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), + ResultReg) + .addReg(II.implicit_defs()[0]); } return ResultReg; } @@ -2001,15 +2005,16 @@ Register FastISel::fastEmitInst_ri(unsigned MachineInstOpcode, Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) .addReg(Op0) .addImm(Imm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addReg(Op0) .addImm(Imm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), + ResultReg) + .addReg(II.implicit_defs()[0]); } return ResultReg; } @@ -2023,17 +2028,18 @@ Register FastISel::fastEmitInst_rii(unsigned MachineInstOpcode, Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) .addReg(Op0) .addImm(Imm1) .addImm(Imm2); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addReg(Op0) .addImm(Imm1) .addImm(Imm2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), + ResultReg) + .addReg(II.implicit_defs()[0]); } return ResultReg; } @@ -2046,13 +2052,14 @@ Register FastISel::fastEmitInst_f(unsigned MachineInstOpcode, Register ResultReg = createResultReg(RC); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) .addFPImm(FPImm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addFPImm(FPImm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), + ResultReg) + .addReg(II.implicit_defs()[0]); } return ResultReg; } @@ -2067,17 +2074,18 @@ Register FastISel::fastEmitInst_rri(unsigned MachineInstOpcode, Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) .addReg(Op0) .addReg(Op1) .addImm(Imm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addReg(Op0) .addReg(Op1) .addImm(Imm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), + ResultReg) + .addReg(II.implicit_defs()[0]); } return ResultReg; } @@ -2088,12 +2096,13 @@ Register FastISel::fastEmitInst_i(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) .addImm(Imm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), + ResultReg) + .addReg(II.implicit_defs()[0]); } return ResultReg; } @@ -2105,7 +2114,7 @@ Register FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, "Cannot yet extract from physregs"); const TargetRegisterClass *RC = MRI.getRegClass(Op0); MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), ResultReg).addReg(Op0, 0, Idx); return ResultReg; } @@ -2170,9 +2179,9 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Set the DebugLoc for the copy. Use the location of the operand if // there is one; otherwise no location, flushLocalValueMap will fix it. - DbgLoc = DebugLoc(); + MIMD = {}; if (const auto *Inst = dyn_cast<Instruction>(PHIOp)) - DbgLoc = Inst->getDebugLoc(); + MIMD = MIMetadata(*Inst); Register Reg = getRegForValue(PHIOp); if (!Reg) { @@ -2180,7 +2189,7 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { return false; } FuncInfo.PHINodesToUpdate.push_back(std::make_pair(&*MBBI++, Reg)); - DbgLoc = DebugLoc(); + MIMD = {}; } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index aa9c77f9cabf..c18cd39ed296 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -119,10 +119,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } } } - if (Personality == EHPersonality::Wasm_CXX) { - WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo(); - calculateWasmEHInfo(&fn, EHInfo); - } // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines @@ -154,7 +150,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, (TFI->isStackRealignable() || (Alignment <= StackAlign))) { const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize()); uint64_t TySize = - MF->getDataLayout().getTypeAllocSize(Ty).getKnownMinSize(); + MF->getDataLayout().getTypeAllocSize(Ty).getKnownMinValue(); TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. @@ -270,7 +266,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only // the first one should be marked. if (BB.hasAddressTaken()) - MBB->setHasAddressTaken(); + MBB->setAddressTakenIRBlock(const_cast<BasicBlock *>(&BB)); // Mark landing pad blocks. if (BB.isEHPad()) @@ -323,10 +319,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, const auto *BB = CME.Handler.get<const BasicBlock *>(); CME.Handler = MBBMap[BB]; } - } - - else if (Personality == EHPersonality::Wasm_CXX) { + } else if (Personality == EHPersonality::Wasm_CXX) { WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo(); + calculateWasmEHInfo(&fn, EHInfo); + // Map all BB references in the Wasm EH data to MBBs. DenseMap<BBOrMBB, BBOrMBB> SrcToUnwindDest; for (auto &KV : EHInfo.SrcToUnwindDest) { @@ -369,8 +365,7 @@ void FunctionLoweringInfo::clear() { /// CreateReg - Allocate a single virtual register for the given type. Register FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) { - return RegInfo->createVirtualRegister( - MF->getSubtarget().getTargetLowering()->getRegClassFor(VT, isDivergent)); + return RegInfo->createVirtualRegister(TLI->getRegClassFor(VT, isDivergent)); } /// CreateRegs - Allocate the appropriate number of virtual registers of @@ -381,8 +376,6 @@ Register FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) { /// will assign registers for each member or element. /// Register FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) { - const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); - SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs); @@ -451,8 +444,8 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { Register DestReg = It->second; if (DestReg == 0) - return - assert(Register::isVirtualRegister(DestReg) && "Expected a virtual reg"); + return; + assert(DestReg.isVirtual() && "Expected a virtual reg"); LiveOutRegInfo.grow(DestReg); LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg]; @@ -475,7 +468,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { assert(ValueMap.count(V) && "V should have been placed in ValueMap when its" "CopyToReg node was created."); Register SrcReg = ValueMap[V]; - if (!Register::isVirtualRegister(SrcReg)) { + if (!SrcReg.isVirtual()) { DestLOI.IsValid = false; return; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 3d3b504c6abd..338172e4e10a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -70,7 +70,7 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses, if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1))) continue; if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1))) - if (Register::isPhysicalRegister(RN->getReg())) + if (RN->getReg().isPhysical()) continue; NumImpUses = N - I; break; @@ -81,9 +81,9 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses, /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. -void InstrEmitter:: -EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, - Register SrcReg, DenseMap<SDValue, Register> &VRBaseMap) { +void InstrEmitter::EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, + Register SrcReg, + DenseMap<SDValue, Register> &VRBaseMap) { Register VRBase; if (SrcReg.isVirtual()) { // Just use the input register directly! @@ -106,51 +106,50 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, if (TLI->isTypeLegal(VT)) UseRC = TLI->getRegClassFor(VT, Node->isDivergent()); - if (!IsClone && !IsCloned) - for (SDNode *User : Node->uses()) { - bool Match = true; - if (User->getOpcode() == ISD::CopyToReg && - User->getOperand(2).getNode() == Node && - User->getOperand(2).getResNo() == ResNo) { - Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); - if (DestReg.isVirtual()) { - VRBase = DestReg; - Match = false; - } else if (DestReg != SrcReg) - Match = false; - } else { - for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { - SDValue Op = User->getOperand(i); - if (Op.getNode() != Node || Op.getResNo() != ResNo) - continue; - MVT VT = Node->getSimpleValueType(Op.getResNo()); - if (VT == MVT::Other || VT == MVT::Glue) - continue; - Match = false; - if (User->isMachineOpcode()) { - const MCInstrDesc &II = TII->get(User->getMachineOpcode()); - const TargetRegisterClass *RC = nullptr; - if (i+II.getNumDefs() < II.getNumOperands()) { - RC = TRI->getAllocatableClass( - TII->getRegClass(II, i+II.getNumDefs(), TRI, *MF)); - } - if (!UseRC) - UseRC = RC; - else if (RC) { - const TargetRegisterClass *ComRC = + for (SDNode *User : Node->uses()) { + bool Match = true; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == ResNo) { + Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (DestReg.isVirtual()) { + VRBase = DestReg; + Match = false; + } else if (DestReg != SrcReg) + Match = false; + } else { + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { + SDValue Op = User->getOperand(i); + if (Op.getNode() != Node || Op.getResNo() != ResNo) + continue; + MVT VT = Node->getSimpleValueType(Op.getResNo()); + if (VT == MVT::Other || VT == MVT::Glue) + continue; + Match = false; + if (User->isMachineOpcode()) { + const MCInstrDesc &II = TII->get(User->getMachineOpcode()); + const TargetRegisterClass *RC = nullptr; + if (i + II.getNumDefs() < II.getNumOperands()) { + RC = TRI->getAllocatableClass( + TII->getRegClass(II, i + II.getNumDefs(), TRI, *MF)); + } + if (!UseRC) + UseRC = RC; + else if (RC) { + const TargetRegisterClass *ComRC = TRI->getCommonSubClass(UseRC, RC); - // If multiple uses expect disjoint register classes, we emit - // copies in AddRegisterOperand. - if (ComRC) - UseRC = ComRC; - } + // If multiple uses expect disjoint register classes, we emit + // copies in AddRegisterOperand. + if (ComRC) + UseRC = ComRC; } } } - MatchReg &= Match; - if (VRBase) - break; } + MatchReg &= Match; + if (VRBase) + break; + } const TargetRegisterClass *SrcRC = nullptr, *DstRC = nullptr; SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT); @@ -219,7 +218,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, RC = VTRC; } - if (II.OpInfo != nullptr && II.OpInfo[i].isOptionalDef()) { + if (!II.operands().empty() && II.operands()[i].isOptionalDef()) { // Optional def must be a physical register. VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg(); assert(VRBase.isPhysical()); @@ -231,8 +230,8 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == i) { - unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); - if (Register::isVirtualRegister(Reg)) { + Register Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (Reg.isVirtual()) { const TargetRegisterClass *RegRC = MRI->getRegClass(Reg); if (RegRC == RC) { VRBase = Reg; @@ -305,7 +304,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, const MCInstrDesc &MCID = MIB->getDesc(); bool isOptDef = IIOpNum < MCID.getNumOperands() && - MCID.OpInfo[IIOpNum].isOptionalDef(); + MCID.operands()[IIOpNum].isOptionalDef(); // If the instruction requires a register in a different class, create // a new virtual register and copy the value into it, but first attempt to @@ -395,7 +394,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, (IIRC && TRI->isDivergentRegClass(IIRC))) : nullptr; - if (OpRC && IIRC && OpRC != IIRC && Register::isVirtualRegister(VReg)) { + if (OpRC && IIRC && OpRC != IIRC && VReg.isVirtual()) { Register NewVReg = MRI->createVirtualRegister(IIRC); BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); @@ -503,7 +502,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, Register Reg; MachineInstr *DefMI; RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0)); - if (R && Register::isPhysicalRegister(R->getReg())) { + if (R && R->getReg().isPhysical()) { Reg = R->getReg(); DefMI = nullptr; } else { @@ -650,7 +649,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1)); // Skip physical registers as they don't have a vreg to get and we'll // insert copies for them in TwoAddressInstructionPass anyway. - if (!R || !Register::isPhysicalRegister(R->getReg())) { + if (!R || !R->getReg().isPhysical()) { unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); @@ -678,43 +677,54 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, MachineInstr * InstrEmitter::EmitDbgValue(SDDbgValue *SD, DenseMap<SDValue, Register> &VRBaseMap) { - MDNode *Var = SD->getVariable(); - MDNode *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); - assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + assert(cast<DILocalVariable>(SD->getVariable()) + ->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); SD->setIsEmitted(); - ArrayRef<SDDbgOperand> LocationOps = SD->getLocationOps(); - assert(!LocationOps.empty() && "dbg_value with no location operands?"); + assert(!SD->getLocationOps().empty() && + "dbg_value with no location operands?"); if (SD->isInvalidated()) return EmitDbgNoLocation(SD); - // Emit variadic dbg_value nodes as DBG_VALUE_LIST. - if (SD->isVariadic()) { - // DBG_VALUE_LIST := "DBG_VALUE_LIST" var, expression, loc (, loc)* - const MCInstrDesc &DbgValDesc = TII->get(TargetOpcode::DBG_VALUE_LIST); - // Build the DBG_VALUE_LIST instruction base. - auto MIB = BuildMI(*MF, DL, DbgValDesc); - MIB.addMetadata(Var); - MIB.addMetadata(Expr); - AddDbgValueLocationOps(MIB, DbgValDesc, LocationOps, VRBaseMap); - return &*MIB; - } - // Attempt to produce a DBG_INSTR_REF if we've been asked to. - // We currently exclude the possibility of instruction references for - // variadic nodes; if at some point we enable them, this should be moved - // above the variadic block. if (EmitDebugInstrRefs) if (auto *InstrRef = EmitDbgInstrRef(SD, VRBaseMap)) return InstrRef; + // Emit variadic dbg_value nodes as DBG_VALUE_LIST if they have not been + // emitted as instruction references. + if (SD->isVariadic()) + return EmitDbgValueList(SD, VRBaseMap); + + // Emit single-location dbg_value nodes as DBG_VALUE if they have not been + // emitted as instruction references. return EmitDbgValueFromSingleOp(SD, VRBaseMap); } +MachineOperand GetMOForConstDbgOp(const SDDbgOperand &Op) { + const Value *V = Op.getConst(); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + if (CI->getBitWidth() > 64) + return MachineOperand::CreateCImm(CI); + return MachineOperand::CreateImm(CI->getSExtValue()); + } + if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) + return MachineOperand::CreateFPImm(CF); + // Note: This assumes that all nullptr constants are zero-valued. + if (isa<ConstantPointerNull>(V)) + return MachineOperand::CreateImm(0); + // Undef or unhandled value type, so return an undef operand. + return MachineOperand::CreateReg( + /* Reg */ 0U, /* isDef */ false, /* isImp */ false, + /* isKill */ false, /* isDead */ false, + /* isUndef */ false, /* isEarlyClobber */ false, + /* SubReg */ 0, /* isDebug */ true); +} + void InstrEmitter::AddDbgValueLocationOps( MachineInstrBuilder &MIB, const MCInstrDesc &DbgValDesc, ArrayRef<SDDbgOperand> LocationOps, @@ -740,24 +750,9 @@ void InstrEmitter::AddDbgValueLocationOps( AddOperand(MIB, V, (*MIB).getNumOperands(), &DbgValDesc, VRBaseMap, /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false); } break; - case SDDbgOperand::CONST: { - const Value *V = Op.getConst(); - if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - if (CI->getBitWidth() > 64) - MIB.addCImm(CI); - else - MIB.addImm(CI->getSExtValue()); - } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { - MIB.addFPImm(CF); - } else if (isa<ConstantPointerNull>(V)) { - // Note: This assumes that all nullptr constants are zero-valued. - MIB.addImm(0); - } else { - // Could be an Undef. In any case insert an Undef so we can see what we - // dropped. - MIB.addReg(0U); - } - } break; + case SDDbgOperand::CONST: + MIB.add(GetMOForConstDbgOp(Op)); + break; } } } @@ -765,116 +760,158 @@ void InstrEmitter::AddDbgValueLocationOps( MachineInstr * InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD, DenseMap<SDValue, Register> &VRBaseMap) { - assert(!SD->isVariadic()); - SDDbgOperand DbgOperand = SD->getLocationOps()[0]; MDNode *Var = SD->getVariable(); - DIExpression *Expr = (DIExpression*)SD->getExpression(); + const DIExpression *Expr = (DIExpression *)SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF); - // Handle variable locations that don't actually depend on the instructions - // in the program: constants and stack locations. - if (DbgOperand.getKind() == SDDbgOperand::FRAMEIX || - DbgOperand.getKind() == SDDbgOperand::CONST) + // Returns true if the given operand is not a legal debug operand for a + // DBG_INSTR_REF. + auto IsInvalidOp = [](SDDbgOperand DbgOp) { + return DbgOp.getKind() == SDDbgOperand::FRAMEIX; + }; + // Returns true if the given operand is not itself an instruction reference + // but is a legal debug operand for a DBG_INSTR_REF. + auto IsNonInstrRefOp = [](SDDbgOperand DbgOp) { + return DbgOp.getKind() == SDDbgOperand::CONST; + }; + + // If this variable location does not depend on any instructions or contains + // any stack locations, produce it as a standard debug value instead. + if (any_of(SD->getLocationOps(), IsInvalidOp) || + all_of(SD->getLocationOps(), IsNonInstrRefOp)) { + if (SD->isVariadic()) + return EmitDbgValueList(SD, VRBaseMap); return EmitDbgValueFromSingleOp(SD, VRBaseMap); + } // Immediately fold any indirectness from the LLVM-IR intrinsic into the // expression: - if (SD->isIndirect()) { - std::vector<uint64_t> Elts = {dwarf::DW_OP_deref}; - Expr = DIExpression::append(Expr, Elts); - } + if (SD->isIndirect()) + Expr = DIExpression::append(Expr, dwarf::DW_OP_deref); + // If this is not already a variadic expression, it must be modified to become + // one. + if (!SD->isVariadic()) + Expr = DIExpression::convertToVariadicExpression(Expr); + + SmallVector<MachineOperand> MOs; // It may not be immediately possible to identify the MachineInstr that // defines a VReg, it can depend for example on the order blocks are // emitted in. When this happens, or when further analysis is needed later, // produce an instruction like this: // - // DBG_INSTR_REF %0:gr64, 0, !123, !456 + // DBG_INSTR_REF !123, !456, %0:gr64 // // i.e., point the instruction at the vreg, and patch it up later in // MachineFunction::finalizeDebugInstrRefs. - auto EmitHalfDoneInstrRef = [&](unsigned VReg) -> MachineInstr * { - auto MIB = BuildMI(*MF, DL, RefII); - MIB.addReg(VReg); - MIB.addImm(0); - MIB.addMetadata(Var); - MIB.addMetadata(Expr); - return MIB; + auto AddVRegOp = [&](unsigned VReg) { + MOs.push_back(MachineOperand::CreateReg( + /* Reg */ VReg, /* isDef */ false, /* isImp */ false, + /* isKill */ false, /* isDead */ false, + /* isUndef */ false, /* isEarlyClobber */ false, + /* SubReg */ 0, /* isDebug */ true)); }; + unsigned OpCount = SD->getLocationOps().size(); + for (unsigned OpIdx = 0; OpIdx < OpCount; ++OpIdx) { + SDDbgOperand DbgOperand = SD->getLocationOps()[OpIdx]; + + // Try to find both the defined register and the instruction defining it. + MachineInstr *DefMI = nullptr; + unsigned VReg; - // Try to find both the defined register and the instruction defining it. - MachineInstr *DefMI = nullptr; - unsigned VReg; + if (DbgOperand.getKind() == SDDbgOperand::VREG) { + VReg = DbgOperand.getVReg(); - if (DbgOperand.getKind() == SDDbgOperand::VREG) { - VReg = DbgOperand.getVReg(); + // No definition means that block hasn't been emitted yet. Leave a vreg + // reference to be fixed later. + if (!MRI->hasOneDef(VReg)) { + AddVRegOp(VReg); + continue; + } - // No definition means that block hasn't been emitted yet. Leave a vreg - // reference to be fixed later. - if (!MRI->hasOneDef(VReg)) - return EmitHalfDoneInstrRef(VReg); + DefMI = &*MRI->def_instr_begin(VReg); + } else if (DbgOperand.getKind() == SDDbgOperand::SDNODE) { + // Look up the corresponding VReg for the given SDNode, if any. + SDNode *Node = DbgOperand.getSDNode(); + SDValue Op = SDValue(Node, DbgOperand.getResNo()); + DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op); + // No VReg -> produce a DBG_VALUE $noreg instead. + if (I == VRBaseMap.end()) + break; - DefMI = &*MRI->def_instr_begin(VReg); - } else { - assert(DbgOperand.getKind() == SDDbgOperand::SDNODE); - // Look up the corresponding VReg for the given SDNode, if any. - SDNode *Node = DbgOperand.getSDNode(); - SDValue Op = SDValue(Node, DbgOperand.getResNo()); - DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op); - // No VReg -> produce a DBG_VALUE $noreg instead. - if (I==VRBaseMap.end()) - return EmitDbgNoLocation(SD); - - // Try to pick out a defining instruction at this point. - VReg = getVR(Op, VRBaseMap); - - // Again, if there's no instruction defining the VReg right now, fix it up - // later. - if (!MRI->hasOneDef(VReg)) - return EmitHalfDoneInstrRef(VReg); - - DefMI = &*MRI->def_instr_begin(VReg); - } + // Try to pick out a defining instruction at this point. + VReg = getVR(Op, VRBaseMap); - // Avoid copy like instructions: they don't define values, only move them. - // Leave a virtual-register reference until it can be fixed up later, to find - // the underlying value definition. - if (DefMI->isCopyLike() || TII->isCopyInstr(*DefMI)) - return EmitHalfDoneInstrRef(VReg); + // Again, if there's no instruction defining the VReg right now, fix it up + // later. + if (!MRI->hasOneDef(VReg)) { + AddVRegOp(VReg); + continue; + } - auto MIB = BuildMI(*MF, DL, RefII); + DefMI = &*MRI->def_instr_begin(VReg); + } else { + assert(DbgOperand.getKind() == SDDbgOperand::CONST); + MOs.push_back(GetMOForConstDbgOp(DbgOperand)); + continue; + } - // Find the operand number which defines the specified VReg. - unsigned OperandIdx = 0; - for (const auto &MO : DefMI->operands()) { - if (MO.isReg() && MO.isDef() && MO.getReg() == VReg) - break; - ++OperandIdx; + // Avoid copy like instructions: they don't define values, only move them. + // Leave a virtual-register reference until it can be fixed up later, to + // find the underlying value definition. + if (DefMI->isCopyLike() || TII->isCopyInstr(*DefMI)) { + AddVRegOp(VReg); + continue; + } + + // Find the operand number which defines the specified VReg. + unsigned OperandIdx = 0; + for (const auto &MO : DefMI->operands()) { + if (MO.isReg() && MO.isDef() && MO.getReg() == VReg) + break; + ++OperandIdx; + } + assert(OperandIdx < DefMI->getNumOperands()); + + // Make the DBG_INSTR_REF refer to that instruction, and that operand. + unsigned InstrNum = DefMI->getDebugInstrNum(); + MOs.push_back(MachineOperand::CreateDbgInstrRef(InstrNum, OperandIdx)); } - assert(OperandIdx < DefMI->getNumOperands()); - // Make the DBG_INSTR_REF refer to that instruction, and that operand. - unsigned InstrNum = DefMI->getDebugInstrNum(); - MIB.addImm(InstrNum); - MIB.addImm(OperandIdx); - MIB.addMetadata(Var); - MIB.addMetadata(Expr); - return &*MIB; + // If we haven't created a valid MachineOperand for every DbgOp, abort and + // produce an undef DBG_VALUE. + if (MOs.size() != OpCount) + return EmitDbgNoLocation(SD); + + return BuildMI(*MF, DL, RefII, false, MOs, Var, Expr); } MachineInstr *InstrEmitter::EmitDbgNoLocation(SDDbgValue *SD) { // An invalidated SDNode must generate an undef DBG_VALUE: although the // original value is no longer computed, earlier DBG_VALUEs live ranges // must not leak into later code. + DIVariable *Var = SD->getVariable(); + const DIExpression *Expr = + DIExpression::convertToUndefExpression(SD->getExpression()); + DebugLoc DL = SD->getDebugLoc(); + const MCInstrDesc &Desc = TII->get(TargetOpcode::DBG_VALUE); + return BuildMI(*MF, DL, Desc, false, 0U, Var, Expr); +} + +MachineInstr * +InstrEmitter::EmitDbgValueList(SDDbgValue *SD, + DenseMap<SDValue, Register> &VRBaseMap) { MDNode *Var = SD->getVariable(); - MDNode *Expr = SD->getExpression(); + DIExpression *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); - auto MIB = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)); - MIB.addReg(0U); - MIB.addReg(0U); + // DBG_VALUE_LIST := "DBG_VALUE_LIST" var, expression, loc (, loc)* + const MCInstrDesc &DbgValDesc = TII->get(TargetOpcode::DBG_VALUE_LIST); + // Build the DBG_VALUE_LIST instruction base. + auto MIB = BuildMI(*MF, DL, DbgValDesc); MIB.addMetadata(Var); MIB.addMetadata(Expr); + AddDbgValueLocationOps(MIB, DbgValDesc, SD->getLocationOps(), VRBaseMap); return &*MIB; } @@ -984,8 +1021,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses); bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() && II.isVariadic() && II.variadicOpsAreDefs(); - bool HasPhysRegOuts = NumResults > NumDefs && - II.getImplicitDefs() != nullptr && !HasVRegVariadicDefs; + bool HasPhysRegOuts = NumResults > NumDefs && !II.implicit_defs().empty() && + !HasVRegVariadicDefs; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; if (II.isVariadic()) @@ -993,8 +1030,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, "Too few operands for a variadic node!"); else assert(NumMIOperands >= II.getNumOperands() && - NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() + - NumImpUses && + NumMIOperands <= + II.getNumOperands() + II.implicit_defs().size() + NumImpUses && "#operands for dag node doesn't match .td file!"); #endif @@ -1063,6 +1100,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // part of the function. MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands()); + // Set the CFI type. + MIB->setCFIType(*MF, Node->getCFIType()); + // Insert the instruction into position in the block. This needs to // happen before any custom inserter hook is called so that the // hook knows where in the block to insert the replacement code. @@ -1088,12 +1128,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Additional results must be physical register defs. if (HasPhysRegOuts) { for (unsigned i = NumDefs; i < NumResults; ++i) { - Register Reg = II.getImplicitDefs()[i - NumDefs]; + Register Reg = II.implicit_defs()[i - NumDefs]; if (!Node->hasAnyUseOfValue(i)) continue; // This implicitly defined physreg has a use. UsedRegs.push_back(Reg); - EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); + EmitCopyFromReg(Node, i, IsClone, Reg, VRBaseMap); } } @@ -1109,8 +1149,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, } // Collect declared implicit uses. const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); - UsedRegs.append(MCID.getImplicitUses(), - MCID.getImplicitUses() + MCID.getNumImplicitUses()); + append_range(UsedRegs, MCID.implicit_uses()); // In addition to declared implicit uses, we must also check for // direct RegisterSDNode operands. for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) @@ -1123,7 +1162,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, } // Finally mark unused registers as dead. - if (!UsedRegs.empty() || II.getImplicitDefs() || II.hasOptionalDef()) + if (!UsedRegs.empty() || !II.implicit_defs().empty() || II.hasOptionalDef()) MIB->setPhysRegsDeadExcept(UsedRegs, *TRI); // STATEPOINT is too 'dynamic' to have meaningful machine description. @@ -1159,14 +1198,13 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, #endif llvm_unreachable("This target-independent node should have been selected!"); case ISD::EntryToken: - llvm_unreachable("EntryToken should have been excluded from the schedule!"); case ISD::MERGE_VALUES: case ISD::TokenFactor: // fall thru break; case ISD::CopyToReg: { Register DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); SDValue SrcVal = Node->getOperand(2); - if (Register::isVirtualRegister(DestReg) && SrcVal.isMachineOpcode() && + if (DestReg.isVirtual() && SrcVal.isMachineOpcode() && SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { // Instead building a COPY to that vreg destination, build an // IMPLICIT_DEF instruction instead. @@ -1189,7 +1227,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, } case ISD::CopyFromReg: { unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); - EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap); + EmitCopyFromReg(Node, 0, IsClone, SrcReg, VRBaseMap); break; } case ISD::EH_LABEL: @@ -1273,28 +1311,25 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, default: llvm_unreachable("Bad flags!"); case InlineAsm::Kind_RegDef: for (unsigned j = 0; j != NumVals; ++j, ++i) { - unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); // FIXME: Add dead flags for physical and virtual registers defined. // For now, mark physical register defs as implicit to help fast // regalloc. This makes inline asm look a lot like calls. - MIB.addReg(Reg, - RegState::Define | - getImplRegState(Register::isPhysicalRegister(Reg))); + MIB.addReg(Reg, RegState::Define | getImplRegState(Reg.isPhysical())); } break; case InlineAsm::Kind_RegDefEarlyClobber: case InlineAsm::Kind_Clobber: for (unsigned j = 0; j != NumVals; ++j, ++i) { - unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); - MIB.addReg(Reg, - RegState::Define | RegState::EarlyClobber | - getImplRegState(Register::isPhysicalRegister(Reg))); + Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber | + getImplRegState(Reg.isPhysical())); ECRegs.push_back(Reg); } break; case InlineAsm::Kind_RegUse: // Use of register. case InlineAsm::Kind_Imm: // Immediate. - case InlineAsm::Kind_Mem: // Addressing mode. + case InlineAsm::Kind_Mem: // Non-function addressing mode. // The addressing mode has been selected, just add all of the // operands to the machine instruction. for (unsigned j = 0; j != NumVals; ++j, ++i) @@ -1312,6 +1347,21 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, } } break; + case InlineAsm::Kind_Func: // Function addressing mode. + for (unsigned j = 0; j != NumVals; ++j, ++i) { + SDValue Op = Node->getOperand(i); + AddOperand(MIB, Op, 0, nullptr, VRBaseMap, + /*IsDebug=*/false, IsClone, IsCloned); + + // Adjust Target Flags for function reference. + if (auto *TGA = dyn_cast<GlobalAddressSDNode>(Op)) { + unsigned NewFlags = + MF->getSubtarget().classifyGlobalFunctionReference( + TGA->getGlobal()); + unsigned LastIdx = MIB.getInstr()->getNumOperands() - 1; + MIB.getInstr()->getOperand(LastIdx).setTargetFlags(NewFlags); + } + } } } @@ -1344,12 +1394,11 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, /// InstrEmitter - Construct an InstrEmitter and set it to start inserting /// at the given position in the given block. InstrEmitter::InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb, - MachineBasicBlock::iterator insertpos, - bool UseInstrRefDebugInfo) + MachineBasicBlock::iterator insertpos) : MF(mbb->getParent()), MRI(&MF->getRegInfo()), TII(MF->getSubtarget().getInstrInfo()), TRI(MF->getSubtarget().getRegisterInfo()), TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb), InsertPos(insertpos) { - EmitDebugInstrRefs = UseInstrRefDebugInfo; + EmitDebugInstrRefs = mbb->getParent()->useDebugInstrRef(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index ced8f064b9be..959bce31c8b2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -44,10 +44,8 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. - void EmitCopyFromReg(SDNode *Node, unsigned ResNo, - bool IsClone, bool IsCloned, - Register SrcReg, - DenseMap<SDValue, Register> &VRBaseMap); + void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, + Register SrcReg, DenseMap<SDValue, Register> &VRBaseMap); void CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, @@ -128,6 +126,10 @@ public: /// Emit a DBG_VALUE $noreg, indicating a variable has no location. MachineInstr *EmitDbgNoLocation(SDDbgValue *SD); + /// Emit a DBG_VALUE_LIST from the operands to SDDbgValue. + MachineInstr *EmitDbgValueList(SDDbgValue *SD, + DenseMap<SDValue, Register> &VRBaseMap); + /// Emit a DBG_VALUE from the operands to SDDbgValue. MachineInstr *EmitDbgValueFromSingleOp(SDDbgValue *SD, DenseMap<SDValue, Register> &VRBaseMap); @@ -154,8 +156,7 @@ public: /// InstrEmitter - Construct an InstrEmitter and set it to start inserting /// at the given position in the given block. InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb, - MachineBasicBlock::iterator insertpos, - bool UseInstrRefDebugInfo); + MachineBasicBlock::iterator insertpos); private: void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 56d35dfe8701..c3106216a060 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -142,10 +142,12 @@ private: RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128, SmallVectorImpl<SDValue> &Results); - SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, - RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, - RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128, - RTLIB::Libcall Call_IEXT); + SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, + RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, + RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, + RTLIB::Libcall Call_I128); void ExpandArgFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, @@ -308,7 +310,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { // We don't want to shrink SNaNs. Converting the SNaN back to its real type // can cause it to be changed into a QNaN on some platforms (e.g. on SystemZ). if (!APF.isSignaling()) { - while (SVT != MVT::f32 && SVT != MVT::f16) { + while (SVT != MVT::f32 && SVT != MVT::f16 && SVT != MVT::bf16) { SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1); if (ConstantFPSDNode::isValueValidForType(SVT, APF) && // Only do this if the target has a native EXTLOAD instruction from @@ -550,16 +552,16 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Promote to a byte-sized store with upper bits zero if not // storing an integral number of bytes. For example, promote // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) - EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StSize.getFixedSize()); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StSize.getFixedValue()); Value = DAG.getZeroExtendInReg(Value, dl, StVT); SDValue Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT, ST->getOriginalAlign(), MMOFlags, AAInfo); ReplaceNode(SDValue(Node, 0), Result); - } else if (!StVT.isVector() && !isPowerOf2_64(StWidth.getFixedSize())) { + } else if (!StVT.isVector() && !isPowerOf2_64(StWidth.getFixedValue())) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); - unsigned StWidthBits = StWidth.getFixedSize(); + unsigned StWidthBits = StWidth.getFixedValue(); unsigned LogStWidth = Log2_32(StWidthBits); assert(LogStWidth < 32); unsigned RoundWidth = 1 << LogStWidth; @@ -767,10 +769,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Value = Result; Chain = Ch; - } else if (!isPowerOf2_64(SrcWidth.getKnownMinSize())) { + } else if (!isPowerOf2_64(SrcWidth.getKnownMinValue())) { // If not loading a power-of-2 number of bits, expand as two loads. assert(!SrcVT.isVector() && "Unsupported extload!"); - unsigned SrcWidthBits = SrcWidth.getFixedSize(); + unsigned SrcWidthBits = SrcWidth.getFixedValue(); unsigned LogSrcWidth = Log2_32(SrcWidthBits); assert(LogSrcWidth < 32); unsigned RoundWidth = 1 << LogSrcWidth; @@ -850,7 +852,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Custom: isCustom = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case TargetLowering::Legal: Value = SDValue(Node, 0); Chain = SDValue(Node, 1); @@ -1035,12 +1037,14 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: case ISD::SETCC: + case ISD::SETCCCARRY: case ISD::VP_SETCC: case ISD::BR_CC: { unsigned Opc = Node->getOpcode(); unsigned CCOperand = Opc == ISD::SELECT_CC ? 4 : Opc == ISD::STRICT_FSETCC ? 3 : Opc == ISD::STRICT_FSETCCS ? 3 + : Opc == ISD::SETCCCARRY ? 3 : (Opc == ISD::SETCC || Opc == ISD::VP_SETCC) ? 2 : 1; unsigned CompareOperand = Opc == ISD::BR_CC ? 2 @@ -1074,7 +1078,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { SimpleFinishLegalizing = false; break; case ISD::EXTRACT_ELEMENT: - case ISD::FLT_ROUNDS_: + case ISD::GET_ROUNDING: case ISD::MERGE_VALUES: case ISD::EH_RETURN: case ISD::FRAME_TO_ARGS_OFFSET: @@ -1317,11 +1321,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; } LLVM_DEBUG(dbgs() << "Could not custom legalize node\n"); - LLVM_FALLTHROUGH; + [[fallthrough]]; case TargetLowering::Expand: if (ExpandNode(Node)) return; - LLVM_FALLTHROUGH; + [[fallthrough]]; case TargetLowering::LibCall: ConvertNodeToLibcall(Node); return; @@ -1717,8 +1721,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, DAG.getConstant(-Alignment.value(), dl, VT)); Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain - Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), - DAG.getIntPtrConstant(0, dl, true), SDValue(), dl); + Tmp2 = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl); Results.push_back(Tmp1); Results.push_back(Tmp2); @@ -2111,17 +2114,15 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, ExpandFPLibCall(Node, LC, Results); } -SDValue SelectionDAGLegalize::ExpandIntLibCall( - SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, - RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, - RTLIB::Libcall Call_I128, RTLIB::Libcall Call_IEXT) { +SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, + RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, + RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, + RTLIB::Libcall Call_I128) { RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { - - default: - LC = Call_IEXT; - break; - + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC = Call_I8; break; case MVT::i16: LC = Call_I16; break; case MVT::i32: LC = Call_I32; break; @@ -2156,11 +2157,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { - - default: - LC = isSigned ? RTLIB::SDIVREM_IEXT : RTLIB::UDIVREM_IEXT; - break; - + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; @@ -2744,7 +2741,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { FA, Offset)); break; } - case ISD::FLT_ROUNDS_: + case ISD::GET_ROUNDING: Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0))); Results.push_back(Node->getOperand(0)); break; @@ -2911,13 +2908,44 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; case ISD::BF16_TO_FP: { // Always expand bf16 to f32 casts, they lower to ext + shift. - SDValue Op = DAG.getNode(ISD::BITCAST, dl, MVT::i16, Node->getOperand(0)); - Op = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op); + // + // Note that the operand of this code can be bf16 or an integer type in case + // bf16 is not supported on the target and was softened. + SDValue Op = Node->getOperand(0); + if (Op.getValueType() == MVT::bf16) { + Op = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, + DAG.getNode(ISD::BITCAST, dl, MVT::i16, Op)); + } else { + Op = DAG.getAnyExtOrTrunc(Op, dl, MVT::i32); + } Op = DAG.getNode( ISD::SHL, dl, MVT::i32, Op, DAG.getConstant(16, dl, TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout()))); Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op); + // Add fp_extend in case the output is bigger than f32. + if (Node->getValueType(0) != MVT::f32) + Op = DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Op); + Results.push_back(Op); + break; + } + case ISD::FP_TO_BF16: { + SDValue Op = Node->getOperand(0); + if (Op.getValueType() != MVT::f32) + Op = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)); + Op = DAG.getNode( + ISD::SRL, dl, MVT::i32, DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op), + DAG.getConstant(16, dl, + TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout()))); + // The result of this node can be bf16 or an integer type in case bf16 is + // not supported on the target and was softened to i16 for storage. + if (Node->getValueType(0) == MVT::bf16) { + Op = DAG.getNode(ISD::BITCAST, dl, MVT::bf16, + DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Op)); + } else { + Op = DAG.getAnyExtOrTrunc(Op, dl, Node->getValueType(0)); + } Results.push_back(Op); break; } @@ -2961,7 +2989,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp2); break; } - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SINT_TO_FP: case ISD::STRICT_SINT_TO_FP: if ((Tmp1 = ExpandLegalINT_TO_FP(Node, Tmp2))) { @@ -3112,7 +3140,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } case ISD::EXTRACT_ELEMENT: { EVT OpTy = Node->getOperand(0).getValueType(); - if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) { + if (Node->getConstantOperandVal(1)) { // 1 -> Hi Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0), DAG.getConstant(OpTy.getSizeInBits() / 2, dl, @@ -3251,8 +3279,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { TLI.isOperationLegalOrCustom(ISD::FP_TO_FP16, MVT::f32)) { // Under fastmath, we can expand this node into a fround followed by // a float-half conversion. - SDValue FloatVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op, - DAG.getIntPtrConstant(0, dl)); + SDValue FloatVal = + DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)); Results.push_back( DAG.getNode(ISD::FP_TO_FP16, dl, Node->getValueType(0), FloatVal)); } @@ -4379,24 +4408,28 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::SUB_PPCF128, Results); break; case ISD::SREM: - Results.push_back(ExpandIntLibCall( - Node, true, RTLIB::SREM_I8, RTLIB::SREM_I16, RTLIB::SREM_I32, - RTLIB::SREM_I64, RTLIB::SREM_I128, RTLIB::SREM_IEXT)); + Results.push_back(ExpandIntLibCall(Node, true, + RTLIB::SREM_I8, + RTLIB::SREM_I16, RTLIB::SREM_I32, + RTLIB::SREM_I64, RTLIB::SREM_I128)); break; case ISD::UREM: - Results.push_back(ExpandIntLibCall( - Node, false, RTLIB::UREM_I8, RTLIB::UREM_I16, RTLIB::UREM_I32, - RTLIB::UREM_I64, RTLIB::UREM_I128, RTLIB::UREM_IEXT)); + Results.push_back(ExpandIntLibCall(Node, false, + RTLIB::UREM_I8, + RTLIB::UREM_I16, RTLIB::UREM_I32, + RTLIB::UREM_I64, RTLIB::UREM_I128)); break; case ISD::SDIV: - Results.push_back(ExpandIntLibCall( - Node, true, RTLIB::SDIV_I8, RTLIB::SDIV_I16, RTLIB::SDIV_I32, - RTLIB::SDIV_I64, RTLIB::SDIV_I128, RTLIB::SDIV_IEXT)); + Results.push_back(ExpandIntLibCall(Node, true, + RTLIB::SDIV_I8, + RTLIB::SDIV_I16, RTLIB::SDIV_I32, + RTLIB::SDIV_I64, RTLIB::SDIV_I128)); break; case ISD::UDIV: - Results.push_back(ExpandIntLibCall( - Node, false, RTLIB::UDIV_I8, RTLIB::UDIV_I16, RTLIB::UDIV_I32, - RTLIB::UDIV_I64, RTLIB::UDIV_I128, RTLIB::UDIV_IEXT)); + Results.push_back(ExpandIntLibCall(Node, false, + RTLIB::UDIV_I8, + RTLIB::UDIV_I16, RTLIB::UDIV_I32, + RTLIB::UDIV_I64, RTLIB::UDIV_I128)); break; case ISD::SDIVREM: case ISD::UDIVREM: @@ -4404,9 +4437,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { ExpandDivRemLibCall(Node, Results); break; case ISD::MUL: - Results.push_back(ExpandIntLibCall( - Node, false, RTLIB::MUL_I8, RTLIB::MUL_I16, RTLIB::MUL_I32, - RTLIB::MUL_I64, RTLIB::MUL_I128, RTLIB::MUL_IEXT)); + Results.push_back(ExpandIntLibCall(Node, false, + RTLIB::MUL_I8, + RTLIB::MUL_I16, RTLIB::MUL_I32, + RTLIB::MUL_I64, RTLIB::MUL_I128)); break; case ISD::CTLZ_ZERO_UNDEF: switch (Node->getSimpleValueType(0).SimpleTy) { @@ -4696,7 +4730,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1); else Tmp1 = DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp1, - DAG.getIntPtrConstant(0, dl)); + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)); Results.push_back(Tmp1); break; @@ -4756,8 +4790,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Node->getFlags()); - Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, - Tmp3, DAG.getIntPtrConstant(0, dl))); + Results.push_back( + DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true))); break; case ISD::STRICT_FADD: case ISD::STRICT_FSUB: @@ -4787,7 +4822,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back( DAG.getNode(ISD::FP_ROUND, dl, OVT, DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3), - DAG.getIntPtrConstant(0, dl))); + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true))); break; case ISD::STRICT_FMA: Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, @@ -4817,8 +4852,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { // (fp_round (fpext a)) // which is a no-op. Mark it as a TRUNCating FP_ROUND. const bool isTrunc = (Node->getOpcode() == ISD::FCOPYSIGN); - Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, - Tmp3, DAG.getIntPtrConstant(isTrunc, dl))); + Results.push_back( + DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, + DAG.getIntPtrConstant(isTrunc, dl, /*isTarget=*/true))); break; } case ISD::STRICT_FPOWI: @@ -4850,8 +4886,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FEXP2: Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); - Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, - Tmp2, DAG.getIntPtrConstant(0, dl))); + Results.push_back( + DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true))); break; case ISD::STRICT_FFLOOR: case ISD::STRICT_FCEIL: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index b2df67f45c72..f1e80ce7e037 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1071,8 +1071,9 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { if (ST->isTruncatingStore()) // Do an FP_ROUND followed by a non-truncating store. - Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(), - Val, DAG.getIntPtrConstant(0, dl))); + Val = BitConvertToInteger( + DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(), Val, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true))); else Val = GetSoftenedFloat(Val); @@ -2532,7 +2533,8 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_XINT_TO_FP(SDNode *N) { // Round the value to the desired precision (that of the source type). return DAG.getNode( ISD::FP_EXTEND, DL, NVT, - DAG.getNode(ISD::FP_ROUND, DL, VT, NV, DAG.getIntPtrConstant(0, DL))); + DAG.getNode(ISD::FP_ROUND, DL, VT, NV, + DAG.getIntPtrConstant(0, DL, /*isTarget=*/true))); } SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) { @@ -2746,39 +2748,47 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FCOPYSIGN(SDNode *N) { } SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FMAD(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0)); SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1)); SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2)); SDLoc dl(N); // Promote to the larger FP type. - Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0); - Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1); - Op2 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op2); + auto PromotionOpcode = GetPromotionOpcode(OVT, NVT); + Op0 = DAG.getNode(PromotionOpcode, dl, NVT, Op0); + Op1 = DAG.getNode(PromotionOpcode, dl, NVT, Op1); + Op2 = DAG.getNode(PromotionOpcode, dl, NVT, Op2); SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1, Op2); // Convert back to FP16 as an integer. - return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res); + return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); } SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FPOWI(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0)); SDValue Op1 = N->getOperand(1); SDLoc dl(N); - Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0); + // Promote to the larger FP type. + Op0 = DAG.getNode(GetPromotionOpcode(OVT, NVT), dl, NVT, Op0); SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1); // Convert back to FP16 as an integer. - return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res); + return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); } SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) { + EVT RVT = N->getValueType(0); + EVT SVT = N->getOperand(0).getValueType(); + if (N->isStrictFPOpcode()) { + assert(RVT == MVT::f16); SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_FP16, SDLoc(N), {MVT::i16, MVT::Other}, {N->getOperand(0), N->getOperand(1)}); @@ -2786,7 +2796,8 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) { return Res; } - return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), MVT::i16, N->getOperand(0)); + return DAG.getNode(GetPromotionOpcode(SVT, RVT), SDLoc(N), MVT::i16, + N->getOperand(0)); } SDValue DAGTypeLegalizer::SoftPromoteHalfRes_LOAD(SDNode *N) { @@ -2821,13 +2832,14 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_SELECT_CC(SDNode *N) { } SDValue DAGTypeLegalizer::SoftPromoteHalfRes_XINT_TO_FP(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); SDLoc dl(N); SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); // Round the value to the softened type. - return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res); + return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); } SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UNDEF(SDNode *N) { @@ -2835,33 +2847,36 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UNDEF(SDNode *N) { } SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UnaryOp(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); SDValue Op = GetSoftPromotedHalf(N->getOperand(0)); SDLoc dl(N); // Promote to the larger FP type. - Op = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op); + Op = DAG.getNode(GetPromotionOpcode(OVT, NVT), dl, NVT, Op); SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op); // Convert back to FP16 as an integer. - return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res); + return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); } SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BinOp(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0)); SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1)); SDLoc dl(N); // Promote to the larger FP type. - Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0); - Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1); + auto PromotionOpcode = GetPromotionOpcode(OVT, NVT); + Op0 = DAG.getNode(PromotionOpcode, dl, NVT, Op0); + Op1 = DAG.getNode(PromotionOpcode, dl, NVT, Op1); SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1); // Convert back to FP16 as an integer. - return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res); + return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); } SDValue DAGTypeLegalizer::SoftPromoteHalfRes_VECREDUCE(SDNode *N) { @@ -2945,22 +2960,27 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo) { assert(OpNo == 1 && "Only Operand 1 must need promotion here"); SDValue Op1 = N->getOperand(1); + EVT RVT = Op1.getValueType(); SDLoc dl(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op1.getValueType()); Op1 = GetSoftPromotedHalf(Op1); - Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1); + Op1 = DAG.getNode(GetPromotionOpcode(RVT, NVT), dl, NVT, Op1); return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), N->getOperand(0), Op1); } SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) { + EVT RVT = N->getValueType(0); bool IsStrict = N->isStrictFPOpcode(); - SDValue Op = GetSoftPromotedHalf(N->getOperand(IsStrict ? 1 : 0)); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + EVT SVT = Op.getValueType(); + Op = GetSoftPromotedHalf(N->getOperand(IsStrict ? 1 : 0)); if (IsStrict) { + assert(SVT == MVT::f16); SDValue Res = DAG.getNode(ISD::STRICT_FP16_TO_FP, SDLoc(N), {N->getValueType(0), MVT::Other}, {N->getOperand(0), Op}); @@ -2969,31 +2989,35 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) { return SDValue(); } - return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), Op); + return DAG.getNode(GetPromotionOpcode(SVT, RVT), SDLoc(N), RVT, Op); } SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) { + EVT RVT = N->getValueType(0); SDValue Op = N->getOperand(0); + EVT SVT = Op.getValueType(); SDLoc dl(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()); Op = GetSoftPromotedHalf(Op); - SDValue Res = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op); + SDValue Res = DAG.getNode(GetPromotionOpcode(SVT, RVT), dl, NVT, Op); return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Res); } SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N) { + EVT RVT = N->getValueType(0); SDValue Op = N->getOperand(0); + EVT SVT = Op.getValueType(); SDLoc dl(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()); Op = GetSoftPromotedHalf(Op); - SDValue Res = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op); + SDValue Res = DAG.getNode(GetPromotionOpcode(SVT, RVT), dl, NVT, Op); return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Res, N->getOperand(1)); @@ -3006,14 +3030,16 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_SELECT_CC(SDNode *N, SDValue Op1 = N->getOperand(1); SDLoc dl(N); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op0.getValueType()); + EVT SVT = Op0.getValueType(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), SVT); Op0 = GetSoftPromotedHalf(Op0); Op1 = GetSoftPromotedHalf(Op1); // Promote to the larger FP type. - Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0); - Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1); + auto PromotionOpcode = GetPromotionOpcode(SVT, NVT); + Op0 = DAG.getNode(PromotionOpcode, dl, NVT, Op0); + Op1 = DAG.getNode(PromotionOpcode, dl, NVT, Op1); return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0), Op0, Op1, N->getOperand(2), N->getOperand(3), N->getOperand(4)); @@ -3025,14 +3051,16 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_SETCC(SDNode *N) { ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); SDLoc dl(N); + EVT SVT = Op0.getValueType(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op0.getValueType()); Op0 = GetSoftPromotedHalf(Op0); Op1 = GetSoftPromotedHalf(Op1); // Promote to the larger FP type. - Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0); - Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1); + auto PromotionOpcode = GetPromotionOpcode(SVT, NVT); + Op0 = DAG.getNode(PromotionOpcode, dl, NVT, Op0); + Op1 = DAG.getNode(PromotionOpcode, dl, NVT, Op1); return DAG.getSetCC(SDLoc(N), N->getValueType(0), Op0, Op1, CCCode); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index e2173879c218..c9ce9071a25d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -137,8 +137,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break; - case ISD::VP_FPTOSI: - case ISD::VP_FPTOUI: + case ISD::VP_FP_TO_SINT: + case ISD::VP_FP_TO_UINT: case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: @@ -148,9 +148,12 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_UINT_SAT: Res = PromoteIntRes_FP_TO_XINT_SAT(N); break; - case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break; + case ISD::FP_TO_BF16: + case ISD::FP_TO_FP16: + Res = PromoteIntRes_FP_TO_FP16_BF16(N); + break; - case ISD::FLT_ROUNDS_: Res = PromoteIntRes_FLT_ROUNDS(N); break; + case ISD::GET_ROUNDING: Res = PromoteIntRes_GET_ROUNDING(N); break; case ISD::AND: case ISD::OR: @@ -165,11 +168,15 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::VP_SUB: case ISD::VP_MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break; + case ISD::VP_SMIN: + case ISD::VP_SMAX: case ISD::SDIV: case ISD::SREM: case ISD::VP_SDIV: case ISD::VP_SREM: Res = PromoteIntRes_SExtIntBinOp(N); break; + case ISD::VP_UMIN: + case ISD::VP_UMAX: case ISD::UDIV: case ISD::UREM: case ISD::VP_UDIV: @@ -673,10 +680,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT)) NewOpc = ISD::STRICT_FP_TO_SINT; - if (N->getOpcode() == ISD::VP_FPTOUI && - !TLI.isOperationLegal(ISD::VP_FPTOUI, NVT) && - TLI.isOperationLegalOrCustom(ISD::VP_FPTOSI, NVT)) - NewOpc = ISD::VP_FPTOSI; + if (N->getOpcode() == ISD::VP_FP_TO_UINT && + !TLI.isOperationLegal(ISD::VP_FP_TO_UINT, NVT) && + TLI.isOperationLegalOrCustom(ISD::VP_FP_TO_SINT, NVT)) + NewOpc = ISD::VP_FP_TO_SINT; SDValue Res; if (N->isStrictFPOpcode()) { @@ -685,7 +692,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); - } else if (NewOpc == ISD::VP_FPTOSI || NewOpc == ISD::VP_FPTOUI) { + } else if (NewOpc == ISD::VP_FP_TO_SINT || NewOpc == ISD::VP_FP_TO_UINT) { Res = DAG.getNode(NewOpc, dl, NVT, {N->getOperand(0), N->getOperand(1), N->getOperand(2)}); } else { @@ -701,7 +708,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { // after legalization: fp-to-sint32, 65534. -> 0x0000fffe return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT || N->getOpcode() == ISD::STRICT_FP_TO_UINT || - N->getOpcode() == ISD::VP_FPTOUI) + N->getOpcode() == ISD::VP_FP_TO_UINT) ? ISD::AssertZext : ISD::AssertSext, dl, NVT, Res, @@ -716,14 +723,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT_SAT(SDNode *N) { N->getOperand(1)); } -SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16_BF16(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); } -SDValue DAGTypeLegalizer::PromoteIntRes_FLT_ROUNDS(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_GET_ROUNDING(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); @@ -836,7 +843,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { SDLoc dl(N); SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(VT, SVT), - makeArrayRef(Ops, NumOps)); + ArrayRef(Ops, NumOps)); // Modified the sum result - switch anything that used the old sum to use // the new one. @@ -1555,7 +1562,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_VSCALE(SDNode *N) { EVT VT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - APInt MulImm = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue(); + const APInt &MulImm = N->getConstantOperandAPInt(0); return DAG.getVScale(SDLoc(N), VT, MulImm.sext(VT.getSizeInBits())); } @@ -1648,7 +1655,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::VP_SETCC: case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break; case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break; - case ISD::VP_SITOFP: + case ISD::VP_SINT_TO_FP: case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break; case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break; case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N), @@ -1663,8 +1670,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { OpNo); break; case ISD::VP_TRUNCATE: case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; + case ISD::BF16_TO_FP: case ISD::FP16_TO_FP: - case ISD::VP_UITOFP: + case ISD::VP_UINT_TO_FP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break; case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; @@ -1998,7 +2006,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { - if (N->getOpcode() == ISD::VP_SITOFP) + if (N->getOpcode() == ISD::VP_SINT_TO_FP) return SDValue(DAG.UpdateNodeOperands(N, SExtPromotedInteger(N->getOperand(0)), N->getOperand(1), N->getOperand(2)), @@ -2127,7 +2135,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { - if (N->getOpcode() == ISD::VP_UITOFP) + if (N->getOpcode() == ISD::VP_UINT_TO_FP) return SDValue(DAG.UpdateNodeOperands(N, ZExtPromotedInteger(N->getOperand(0)), N->getOperand(1), N->getOperand(2)), @@ -2420,17 +2428,21 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break; case ISD::CTTZ_ZERO_UNDEF: case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break; - case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break; + case ISD::GET_ROUNDING:ExpandIntRes_GET_ROUNDING(N, Lo, Hi); break; case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break; case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: ExpandIntRes_FP_TO_XINT_SAT(N, Lo, Hi); break; + case ISD::STRICT_LROUND: + case ISD::STRICT_LRINT: + case ISD::LROUND: + case ISD::LRINT: case ISD::STRICT_LLROUND: case ISD::STRICT_LLRINT: case ISD::LLROUND: - case ISD::LLRINT: ExpandIntRes_LLROUND_LLRINT(N, Lo, Hi); break; + case ISD::LLRINT: ExpandIntRes_XROUND_XRINT(N, Lo, Hi); break; case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break; case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break; @@ -2866,15 +2878,29 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N, ISD::CondCode CondC; std::tie(CondC, LoOpc) = getExpandedMinMaxOps(N->getOpcode()); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + // Expand the subcomponents. SDValue LHSL, LHSH, RHSL, RHSH; - GetExpandedInteger(N->getOperand(0), LHSL, LHSH); - GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + GetExpandedInteger(LHS, LHSL, LHSH); + GetExpandedInteger(RHS, RHSL, RHSH); // Value types EVT NVT = LHSL.getValueType(); EVT CCT = getSetCCResultType(NVT); + // If the upper halves are all sign bits, then we can perform the MINMAX on + // the lower half and sign-extend the result to the upper half. + unsigned NumHalfBits = NVT.getScalarSizeInBits(); + if (DAG.ComputeNumSignBits(LHS) > NumHalfBits && + DAG.ComputeNumSignBits(RHS) > NumHalfBits) { + Lo = DAG.getNode(N->getOpcode(), DL, NVT, LHSL, RHSL); + Hi = DAG.getNode(ISD::SRA, DL, NVT, Lo, + DAG.getShiftAmountConstant(NumHalfBits - 1, NVT, DL)); + return; + } + // Hi part is always the same op Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH}); @@ -2913,13 +2939,13 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); Hi = DAG.computeKnownBits(HiOps[2]).isZero() - ? DAG.getNode(ISD::UADDO, dl, VTList, makeArrayRef(HiOps, 2)) + ? DAG.getNode(ISD::UADDO, dl, VTList, ArrayRef(HiOps, 2)) : DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps); } else { Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); Hi = DAG.computeKnownBits(HiOps[2]).isZero() - ? DAG.getNode(ISD::USUBO, dl, VTList, makeArrayRef(HiOps, 2)) + ? DAG.getNode(ISD::USUBO, dl, VTList, ArrayRef(HiOps, 2)) : DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps); } return; @@ -2962,18 +2988,18 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, if (N->getOpcode() == ISD::ADD) { RevOpc = ISD::SUB; Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps); - Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); + Hi = DAG.getNode(ISD::ADD, dl, NVT, ArrayRef(HiOps, 2)); } else { RevOpc = ISD::ADD; Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps); - Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2)); + Hi = DAG.getNode(ISD::SUB, dl, NVT, ArrayRef(HiOps, 2)); } SDValue OVF = Lo.getValue(1); switch (BoolType) { case TargetLoweringBase::UndefinedBooleanContent: OVF = DAG.getNode(ISD::AND, dl, OvfVT, DAG.getConstant(1, dl, OvfVT), OVF); - LLVM_FALLTHROUGH; + [[fallthrough]]; case TargetLoweringBase::ZeroOrOneBooleanContent: OVF = DAG.getZExtOrTrunc(OVF, dl, NVT); Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF); @@ -2987,27 +3013,21 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, if (N->getOpcode() == ISD::ADD) { Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps); - Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); - SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], - ISD::SETULT); + Hi = DAG.getNode(ISD::ADD, dl, NVT, ArrayRef(HiOps, 2)); + SDValue Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], + ISD::SETULT); - if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) { - SDValue Carry = DAG.getZExtOrTrunc(Cmp1, dl, NVT); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry); - return; - } + SDValue Carry; + if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) + Carry = DAG.getZExtOrTrunc(Cmp, dl, NVT); + else + Carry = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT), + DAG.getConstant(0, dl, NVT)); - SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1, - DAG.getConstant(1, dl, NVT), - DAG.getConstant(0, dl, NVT)); - SDValue Cmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[1], - ISD::SETULT); - SDValue Carry2 = DAG.getSelect(dl, NVT, Cmp2, - DAG.getConstant(1, dl, NVT), Carry1); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry); } else { Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps); - Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2)); + Hi = DAG.getNode(ISD::SUB, dl, NVT, ArrayRef(HiOps, 2)); SDValue Cmp = DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()), LoOps[0], LoOps[1], ISD::SETULT); @@ -3280,6 +3300,14 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) { GetExpandedInteger(N0, Lo, Hi); EVT NVT = Lo.getValueType(); + // If the upper half is all sign bits, then we can perform the ABS on the + // lower half and zero-extend. + if (DAG.ComputeNumSignBits(N0) > NVT.getScalarSizeInBits()) { + Lo = DAG.getNode(ISD::ABS, dl, NVT, Lo); + Hi = DAG.getConstant(0, dl, NVT); + return; + } + // If we have SUBCARRY, use the expanded form of the sra+xor+sub sequence we // use in LegalizeDAG. The SUB part of the expansion is based on // ExpandIntRes_ADDSUB which also uses SUBCARRY/USUBO after checking that @@ -3364,15 +3392,15 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, Hi = DAG.getConstant(0, dl, NVT); } -void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo, +void DAGTypeLegalizer::ExpandIntRes_GET_ROUNDING(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned NBitWidth = NVT.getSizeInBits(); - Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, {NVT, MVT::Other}, N->getOperand(0)); + Lo = DAG.getNode(ISD::GET_ROUNDING, dl, {NVT, MVT::Other}, N->getOperand(0)); SDValue Chain = Lo.getValue(1); - // The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS + // The high part is the sign of Lo, as -1 is a valid value for GET_ROUNDING Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, DAG.getShiftAmountConstant(NBitWidth - 1, NVT, dl)); @@ -3450,17 +3478,57 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, SplitInteger(Res, Lo, Hi); } -void DAGTypeLegalizer::ExpandIntRes_LLROUND_LLRINT(SDNode *N, SDValue &Lo, - SDValue &Hi) { - SDValue Op = N->getOperand(N->isStrictFPOpcode() ? 1 : 0); +void DAGTypeLegalizer::ExpandIntRes_XROUND_XRINT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); assert(getTypeAction(Op.getValueType()) != TargetLowering::TypePromoteFloat && "Input type needs to be promoted!"); EVT VT = Op.getValueType(); + if (VT == MVT::f16) { + VT = MVT::f32; + // Extend to f32. + if (IsStrict) { + Op = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, { VT, MVT::Other }, {Chain, Op}); + Chain = Op.getValue(1); + } else { + Op = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op); + } + } + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - if (N->getOpcode() == ISD::LLROUND || + if (N->getOpcode() == ISD::LROUND || + N->getOpcode() == ISD::STRICT_LROUND) { + if (VT == MVT::f32) + LC = RTLIB::LROUND_F32; + else if (VT == MVT::f64) + LC = RTLIB::LROUND_F64; + else if (VT == MVT::f80) + LC = RTLIB::LROUND_F80; + else if (VT == MVT::f128) + LC = RTLIB::LROUND_F128; + else if (VT == MVT::ppcf128) + LC = RTLIB::LROUND_PPCF128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected lround input type!"); + } else if (N->getOpcode() == ISD::LRINT || + N->getOpcode() == ISD::STRICT_LRINT) { + if (VT == MVT::f32) + LC = RTLIB::LRINT_F32; + else if (VT == MVT::f64) + LC = RTLIB::LRINT_F64; + else if (VT == MVT::f80) + LC = RTLIB::LRINT_F80; + else if (VT == MVT::f128) + LC = RTLIB::LRINT_F128; + else if (VT == MVT::ppcf128) + LC = RTLIB::LRINT_PPCF128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected lrint input type!"); + } else if (N->getOpcode() == ISD::LLROUND || N->getOpcode() == ISD::STRICT_LLROUND) { if (VT == MVT::f32) LC = RTLIB::LLROUND_F32; @@ -3489,9 +3557,7 @@ void DAGTypeLegalizer::ExpandIntRes_LLROUND_LLRINT(SDNode *N, SDValue &Lo, } else llvm_unreachable("Unexpected opcode!"); - SDLoc dl(N); EVT RetVT = N->getValueType(0); - SDValue Chain = N->isStrictFPOpcode() ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); @@ -4046,70 +4112,6 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, ReplaceValueWith(SDValue(Node, 1), Ovf); } -// Emit a call to __udivei4 and friends which require -// the arguments be based on the stack -// and extra argument that contains the number of bits of the operands. -// Returns the result of the call operation. -static SDValue ExpandExtIntRes_DIVREM(const TargetLowering &TLI, - const RTLIB::Libcall &LC, - SelectionDAG &DAG, SDNode *N, - const SDLoc &DL, const EVT &VT) { - - SDValue InChain = DAG.getEntryNode(); - - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - - // The signature of __udivei4 is - // void __udivei4(unsigned int *quo, unsigned int *a, unsigned int *b, - // unsigned int bits) - EVT ArgVT = N->op_begin()->getValueType(); - assert(ArgVT.isInteger() && ArgVT.getSizeInBits() > 128 && - "Unexpected argument type for lowering"); - Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - - SDValue Output = DAG.CreateStackTemporary(ArgVT); - Entry.Node = Output; - Entry.Ty = ArgTy->getPointerTo(); - Entry.IsSExt = false; - Entry.IsZExt = false; - Args.push_back(Entry); - - for (const llvm::SDUse &Op : N->ops()) { - SDValue StackPtr = DAG.CreateStackTemporary(ArgVT); - InChain = DAG.getStore(InChain, DL, Op, StackPtr, MachinePointerInfo()); - Entry.Node = StackPtr; - Entry.Ty = ArgTy->getPointerTo(); - Entry.IsSExt = false; - Entry.IsZExt = false; - Args.push_back(Entry); - } - - int Bits = N->getOperand(0) - .getValueType() - .getTypeForEVT(*DAG.getContext()) - ->getIntegerBitWidth(); - Entry.Node = DAG.getConstant(Bits, DL, TLI.getPointerTy(DAG.getDataLayout())); - Entry.Ty = Type::getInt32Ty(*DAG.getContext()); - Entry.IsSExt = false; - Entry.IsZExt = true; - Args.push_back(Entry); - - SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), - TLI.getPointerTy(DAG.getDataLayout())); - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(DL) - .setChain(InChain) - .setLibCallee(TLI.getLibcallCallingConv(LC), - Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args)) - .setDiscardResult(); - - SDValue Chain = TLI.LowerCallTo(CLI).second; - - return DAG.getLoad(ArgVT, DL, Chain, Output, MachinePointerInfo()); -} - void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -4131,14 +4133,6 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, LC = RTLIB::SDIV_I64; else if (VT == MVT::i128) LC = RTLIB::SDIV_I128; - - else { - SDValue Result = - ExpandExtIntRes_DIVREM(TLI, RTLIB::SDIV_IEXT, DAG, N, dl, VT); - SplitInteger(Result, Lo, Hi); - return; - } - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -4146,6 +4140,111 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } +void DAGTypeLegalizer::ExpandIntRes_ShiftThroughStack(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + SDValue Shiftee = N->getOperand(0); + EVT VT = Shiftee.getValueType(); + SDValue ShAmt = N->getOperand(1); + EVT ShAmtVT = ShAmt.getValueType(); + + // This legalization is optimal when the shift is by a multiple of byte width, + // %x * 8 <-> %x << 3 so 3 low bits should be be known zero. + bool ShiftByByteMultiple = + DAG.computeKnownBits(ShAmt).countMinTrailingZeros() >= 3; + + // If we can't do it as one step, we'll have two uses of shift amount, + // and thus must freeze it. + if (!ShiftByByteMultiple) + ShAmt = DAG.getFreeze(ShAmt); + + unsigned VTBitWidth = VT.getScalarSizeInBits(); + assert(VTBitWidth % 8 == 0 && "Shifting a not byte multiple value?"); + unsigned VTByteWidth = VTBitWidth / 8; + assert(isPowerOf2_32(VTByteWidth) && + "Shiftee type size is not a power of two!"); + unsigned StackSlotByteWidth = 2 * VTByteWidth; + unsigned StackSlotBitWidth = 8 * StackSlotByteWidth; + EVT StackSlotVT = EVT::getIntegerVT(*DAG.getContext(), StackSlotBitWidth); + + // Get a temporary stack slot 2x the width of our VT. + // FIXME: reuse stack slots? + // FIXME: should we be more picky about alignment? + Align StackSlotAlignment(1); + SDValue StackPtr = DAG.CreateStackTemporary( + TypeSize::getFixed(StackSlotByteWidth), StackSlotAlignment); + EVT PtrTy = StackPtr.getValueType(); + SDValue Ch = DAG.getEntryNode(); + + MachinePointerInfo StackPtrInfo = MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), + cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex()); + + // Extend the value, that is being shifted, to the entire stack slot's width. + SDValue Init; + if (N->getOpcode() != ISD::SHL) { + unsigned WideningOpc = + N->getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + Init = DAG.getNode(WideningOpc, dl, StackSlotVT, Shiftee); + } else { + // For left-shifts, pad the Shiftee's LSB with zeros to twice it's width. + SDValue AllZeros = DAG.getConstant(0, dl, VT); + Init = DAG.getNode(ISD::BUILD_PAIR, dl, StackSlotVT, AllZeros, Shiftee); + } + // And spill it into the stack slot. + Ch = DAG.getStore(Ch, dl, Init, StackPtr, StackPtrInfo, StackSlotAlignment); + + // Now, compute the full-byte offset into stack slot from where we can load. + // We have shift amount, which is in bits, but in multiples of byte. + // So just divide by CHAR_BIT. + SDNodeFlags Flags; + if (ShiftByByteMultiple) + Flags.setExact(true); + SDValue ByteOffset = DAG.getNode(ISD::SRL, dl, ShAmtVT, ShAmt, + DAG.getConstant(3, dl, ShAmtVT), Flags); + // And clamp it, because OOB load is an immediate UB, + // while shift overflow would have *just* been poison. + ByteOffset = DAG.getNode(ISD::AND, dl, ShAmtVT, ByteOffset, + DAG.getConstant(VTByteWidth - 1, dl, ShAmtVT)); + // We have exactly two strategies on indexing into stack slot here: + // 1. upwards starting from the beginning of the slot + // 2. downwards starting from the middle of the slot + // On little-endian machine, we pick 1. for right shifts and 2. for left-shift + // and vice versa on big-endian machine. + bool WillIndexUpwards = N->getOpcode() != ISD::SHL; + if (DAG.getDataLayout().isBigEndian()) + WillIndexUpwards = !WillIndexUpwards; + + SDValue AdjStackPtr; + if (WillIndexUpwards) { + AdjStackPtr = StackPtr; + } else { + AdjStackPtr = DAG.getMemBasePlusOffset( + StackPtr, DAG.getConstant(VTByteWidth, dl, PtrTy), dl); + ByteOffset = DAG.getNegative(ByteOffset, dl, ShAmtVT); + } + + // Get the pointer somewhere into the stack slot from which we need to load. + ByteOffset = DAG.getSExtOrTrunc(ByteOffset, dl, PtrTy); + AdjStackPtr = DAG.getMemBasePlusOffset(AdjStackPtr, ByteOffset, dl); + + // And load it! While the load is not legal, legalizing it is obvious. + SDValue Res = DAG.getLoad( + VT, dl, Ch, AdjStackPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), Align(1)); + // We've performed the shift by a CHAR_BIT * [_ShAmt / CHAR_BIT_] + + // If we may still have a less-than-CHAR_BIT to shift by, do so now. + if (!ShiftByByteMultiple) { + SDValue ShAmtRem = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt, + DAG.getConstant(7, dl, ShAmtVT)); + Res = DAG.getNode(N->getOpcode(), dl, VT, Res, ShAmtRem); + } + + // Finally, split the computed value. + SplitInteger(Res, Lo, Hi); +} + void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -4181,7 +4280,24 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, (Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) || Action == TargetLowering::Custom; - if (LegalOrCustom && TLI.shouldExpandShift(DAG, N)) { + unsigned ExpansionFactor = 1; + // That VT->NVT expansion is one step. But will we re-expand NVT? + for (EVT TmpVT = NVT;;) { + EVT NewTMPVT = TLI.getTypeToTransformTo(*DAG.getContext(), TmpVT); + if (NewTMPVT == TmpVT) + break; + TmpVT = NewTMPVT; + ++ExpansionFactor; + } + + TargetLowering::ShiftLegalizationStrategy S = + TLI.preferredShiftLegalizationStrategy(DAG, N, ExpansionFactor); + + if (S == TargetLowering::ShiftLegalizationStrategy::ExpandThroughStack) + return ExpandIntRes_ShiftThroughStack(N, Lo, Hi); + + if (LegalOrCustom && + S != TargetLowering::ShiftLegalizationStrategy::LowerToLibcall) { // Expand the subcomponents. SDValue LHSL, LHSH; GetExpandedInteger(N->getOperand(0), LHSL, LHSH); @@ -4330,14 +4446,6 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, LC = RTLIB::SREM_I64; else if (VT == MVT::i128) LC = RTLIB::SREM_I128; - - else { - SDValue Result = - ExpandExtIntRes_DIVREM(TLI, RTLIB::SREM_IEXT, DAG, N, dl, VT); - SplitInteger(Result, Lo, Hi); - return; - } - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -4507,6 +4615,22 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, return; } + // Try to expand UDIV by constant. + if (isa<ConstantSDNode>(N->getOperand(1))) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + // Only if the new type is legal. + if (isTypeLegal(NVT)) { + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + SmallVector<SDValue> Result; + if (TLI.expandDIVREMByConstant(N, Result, NVT, DAG, InL, InH)) { + Lo = Result[0]; + Hi = Result[1]; + return; + } + } + } + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) LC = RTLIB::UDIV_I16; @@ -4516,14 +4640,6 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, LC = RTLIB::UDIV_I64; else if (VT == MVT::i128) LC = RTLIB::UDIV_I128; - - else { - SDValue Result = - ExpandExtIntRes_DIVREM(TLI, RTLIB::UDIV_IEXT, DAG, N, dl, VT); - SplitInteger(Result, Lo, Hi); - return; - } - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -4542,6 +4658,22 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, return; } + // Try to expand UREM by constant. + if (isa<ConstantSDNode>(N->getOperand(1))) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + // Only if the new type is legal. + if (isTypeLegal(NVT)) { + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + SmallVector<SDValue> Result; + if (TLI.expandDIVREMByConstant(N, Result, NVT, DAG, InL, InH)) { + Lo = Result[0]; + Hi = Result[1]; + return; + } + } + } + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) LC = RTLIB::UREM_I16; @@ -4551,14 +4683,6 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, LC = RTLIB::UREM_I64; else if (VT == MVT::i128) LC = RTLIB::UREM_I128; - - else { - SDValue Result = - ExpandExtIntRes_DIVREM(TLI, RTLIB::UREM_IEXT, DAG, N, dl, VT); - SplitInteger(Result, Lo, Hi); - return; - } - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -5297,7 +5421,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) { return DAG.getVectorShuffle(OutVT, dl, V0, V1, NewMask); } - SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); @@ -5355,7 +5478,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_STEP_VECTOR(SDNode *N) { EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isScalableVector() && "Type must be promoted to a scalable vector type"); - APInt StepVal = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue(); + const APInt &StepVal = N->getConstantOperandAPInt(0); return DAG.getStepVector(dl, NOutVT, StepVal.sext(NOutVT.getScalarSizeInBits())); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 8fe9a83b9c3d..5e0349593139 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -722,9 +722,13 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { - assert(Result.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && +#ifndef NDEBUG + EVT VT = Result.getValueType(); + LLVMContext &Ctx = *DAG.getContext(); + assert((VT == EVT::getIntegerVT(Ctx, 80) || + VT == TLI.getTypeToTransformTo(Ctx, Op.getValueType())) && "Invalid type for softened float"); +#endif AnalyzeNewValue(Result); auto &OpIdEntry = SoftenedFloats[getTableId(Op)]; @@ -759,7 +763,7 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { // a constant i8 operand. // We don't currently support the scalarization of scalable vector types. - assert(Result.getValueSizeInBits().getFixedSize() >= + assert(Result.getValueSizeInBits().getFixedValue() >= Op.getScalarValueSizeInBits() && "Invalid type for scalarized vector"); AnalyzeNewValue(Result); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 6696b79cf885..b97e44a01319 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -324,7 +324,7 @@ private: SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); SDValue PromoteIntRes_FP_TO_XINT_SAT(SDNode *N); - SDValue PromoteIntRes_FP_TO_FP16(SDNode *N); + SDValue PromoteIntRes_FP_TO_FP16_BF16(SDNode *N); SDValue PromoteIntRes_FREEZE(SDNode *N); SDValue PromoteIntRes_INT_EXTEND(SDNode *N); SDValue PromoteIntRes_LOAD(LoadSDNode *N); @@ -354,7 +354,7 @@ private: SDValue PromoteIntRes_ADDSUBSHLSAT(SDNode *N); SDValue PromoteIntRes_MULFIX(SDNode *N); SDValue PromoteIntRes_DIVFIX(SDNode *N); - SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N); + SDValue PromoteIntRes_GET_ROUNDING(SDNode *N); SDValue PromoteIntRes_VECREDUCE(SDNode *N); SDValue PromoteIntRes_VP_REDUCE(SDNode *N); SDValue PromoteIntRes_ABS(SDNode *N); @@ -437,11 +437,11 @@ private: void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandIntRes_FLT_ROUNDS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_GET_ROUNDING (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_FP_TO_XINT_SAT (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandIntRes_LLROUND_LLRINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_XROUND_XRINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -457,6 +457,7 @@ private: void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ShiftThroughStack (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_MINMAX (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -862,6 +863,8 @@ private: void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo, + SDValue &Hi); void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi); void SplitVecRes_Gather(MemSDNode *VPGT, SDValue &Lo, SDValue &Hi, bool SplitSETCC = false); @@ -891,6 +894,7 @@ private: SDValue SplitVecOp_ExtVecInRegOp(SDNode *N); SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo); + SDValue SplitVecOp_VP_STRIDED_STORE(VPStridedStoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo); SDValue SplitVecOp_Gather(MemSDNode *MGT, unsigned OpNo); @@ -947,6 +951,7 @@ private: SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); + SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N); SDValue WidenVecRes_VP_GATHER(VPGatherSDNode* N); @@ -958,6 +963,7 @@ private: SDValue WidenVecRes_STRICT_FSETCC(SDNode* N); SDValue WidenVecRes_UNDEF(SDNode *N); SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); + SDValue WidenVecRes_VECTOR_REVERSE(SDNode *N); SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); @@ -984,6 +990,7 @@ private: SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); SDValue WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo); + SDValue WidenVecOp_VP_STRIDED_STORE(SDNode *N, unsigned OpNo); SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index c6885677d644..21b5255c8f72 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -197,8 +197,7 @@ void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo, SDValue &Hi) { GetExpandedOp(N->getOperand(0), Lo, Hi); - SDValue Part = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? - Hi : Lo; + SDValue Part = N->getConstantOperandVal(1) ? Hi : Lo; assert(Part.getValueType() == N->getValueType(0) && "Type twice as big as expanded type not itself expanded!"); @@ -209,7 +208,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue OldVec = N->getOperand(0); - unsigned OldElts = OldVec.getValueType().getVectorNumElements(); + ElementCount OldEltCount = OldVec.getValueType().getVectorElementCount(); EVT OldEltVT = OldVec.getValueType().getVectorElementType(); SDLoc dl(N); @@ -223,14 +222,13 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, // the input vector. If so, extend the elements of the input vector to the // same bitwidth as the result before expanding. assert(OldEltVT.bitsLT(OldVT) && "Result type smaller then element type!"); - EVT NVecVT = EVT::getVectorVT(*DAG.getContext(), OldVT, OldElts); + EVT NVecVT = EVT::getVectorVT(*DAG.getContext(), OldVT, OldEltCount); OldVec = DAG.getNode(ISD::ANY_EXTEND, dl, NVecVT, N->getOperand(0)); } - SDValue NewVec = DAG.getNode(ISD::BITCAST, dl, - EVT::getVectorVT(*DAG.getContext(), - NewVT, 2*OldElts), - OldVec); + SDValue NewVec = DAG.getNode( + ISD::BITCAST, dl, + EVT::getVectorVT(*DAG.getContext(), NewVT, OldEltCount * 2), OldVec); // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector. SDValue Idx = N->getOperand(1); @@ -359,8 +357,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) { SmallVector<SDValue, 8> Ops; IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType()); - SDValue Vec = - DAG.getBuildVector(NVT, dl, makeArrayRef(Ops.data(), NumElts)); + SDValue Vec = DAG.getBuildVector(NVT, dl, ArrayRef(Ops.data(), NumElts)); return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); } @@ -403,7 +400,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) { SDValue Lo, Hi; GetExpandedOp(N->getOperand(0), Lo, Hi); - return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? Hi : Lo; + return N->getConstantOperandVal(1) ? Hi : Lo; } SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index f5a1eae1e7fe..e245b3cb4c6d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -132,6 +132,7 @@ class VectorLegalizer { SDValue ExpandVSELECT(SDNode *Node); SDValue ExpandVP_SELECT(SDNode *Node); SDValue ExpandVP_MERGE(SDNode *Node); + SDValue ExpandVP_REM(SDNode *Node); SDValue ExpandSELECT(SDNode *Node); std::pair<SDValue, SDValue> ExpandLoad(SDNode *N); SDValue ExpandStore(SDNode *N); @@ -492,7 +493,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { if (LowerOperationWrapper(Node, ResultVals)) break; LLVM_DEBUG(dbgs() << "Could not custom legalize node\n"); - LLVM_FALLTHROUGH; + [[fallthrough]]; case TargetLowering::Expand: LLVM_DEBUG(dbgs() << "Expanding\n"); Expand(Node, ResultVals); @@ -594,7 +595,8 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) { if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) - Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res, DAG.getIntPtrConstant(0, dl)); + Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)); else Res = DAG.getNode(ISD::BITCAST, dl, VT, Res); @@ -728,12 +730,22 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { case ISD::BSWAP: Results.push_back(ExpandBSWAP(Node)); return; + case ISD::VP_BSWAP: + Results.push_back(TLI.expandVPBSWAP(Node, DAG)); + return; case ISD::VSELECT: Results.push_back(ExpandVSELECT(Node)); return; case ISD::VP_SELECT: Results.push_back(ExpandVP_SELECT(Node)); return; + case ISD::VP_SREM: + case ISD::VP_UREM: + if (SDValue Expanded = ExpandVP_REM(Node)) { + Results.push_back(Expanded); + return; + } + break; case ISD::SELECT: Results.push_back(ExpandSELECT(Node)); return; @@ -776,12 +788,24 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { case ISD::BITREVERSE: ExpandBITREVERSE(Node, Results); return; + case ISD::VP_BITREVERSE: + if (SDValue Expanded = TLI.expandVPBITREVERSE(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::CTPOP: if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) { Results.push_back(Expanded); return; } break; + case ISD::VP_CTPOP: + if (SDValue Expanded = TLI.expandVPCTPOP(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) { @@ -789,6 +813,13 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { return; } break; + case ISD::VP_CTLZ: + case ISD::VP_CTLZ_ZERO_UNDEF: + if (SDValue Expanded = TLI.expandVPCTLZ(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) { @@ -796,8 +827,17 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { return; } break; + case ISD::VP_CTTZ: + case ISD::VP_CTTZ_ZERO_UNDEF: + if (SDValue Expanded = TLI.expandVPCTTZ(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::FSHL: + case ISD::VP_FSHL: case ISD::FSHR: + case ISD::VP_FSHR: if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG)) { Results.push_back(Expanded); return; @@ -847,6 +887,13 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { return; } break; + case ISD::USHLSAT: + case ISD::SSHLSAT: + if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: // Expand the fpsosisat if it is scalable to prevent it from unrolling below. @@ -954,10 +1001,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) { DAG.getConstant(0, DL, BitTy)); // Broadcast the mask so that the entire vector is all one or all zero. - if (VT.isFixedLengthVector()) - Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask); - else - Mask = DAG.getSplatVector(MaskTy, DL, Mask); + Mask = DAG.getSplat(MaskTy, DL, Mask); // Bitcast the operands to be the same type as the mask. // This is needed when we select between FP types because @@ -1300,8 +1344,7 @@ SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) { return DAG.UnrollVectorOp(Node); SDValue StepVec = DAG.getStepVector(DL, EVLVecVT); - SDValue SplatEVL = IsFixedLen ? DAG.getSplatBuildVector(EVLVecVT, DL, EVL) - : DAG.getSplatVector(EVLVecVT, DL, EVL); + SDValue SplatEVL = DAG.getSplat(EVLVecVT, DL, EVL); SDValue EVLMask = DAG.getSetCC(DL, MaskVT, StepVec, SplatEVL, ISD::CondCode::SETULT); @@ -1309,6 +1352,30 @@ SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) { return DAG.getSelect(DL, Node->getValueType(0), FullMask, Op1, Op2); } +SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) { + // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB. + EVT VT = Node->getValueType(0); + + unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV; + + if (!TLI.isOperationLegalOrCustom(DivOpc, VT) || + !TLI.isOperationLegalOrCustom(ISD::VP_MUL, VT) || + !TLI.isOperationLegalOrCustom(ISD::VP_SUB, VT)) + return SDValue(); + + SDLoc DL(Node); + + SDValue Dividend = Node->getOperand(0); + SDValue Divisor = Node->getOperand(1); + SDValue Mask = Node->getOperand(2); + SDValue EVL = Node->getOperand(3); + + // X % Y -> X-X/Y*Y + SDValue Div = DAG.getNode(DivOpc, DL, VT, Dividend, Divisor, Mask, EVL); + SDValue Mul = DAG.getNode(ISD::VP_MUL, DL, VT, Divisor, Div, Mask, EVL); + return DAG.getNode(ISD::VP_SUB, DL, VT, Dividend, Mul, Mask, EVL); +} + void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results) { // Attempt to expand using TargetLowering. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 143abc08eeea..af5ea1ce5f45 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -27,6 +27,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TypeSize.h" #include "llvm/Support/raw_ostream.h" +#include <numeric> + using namespace llvm; #define DEBUG_TYPE "legalize-types" @@ -975,6 +977,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_LOAD: SplitVecRes_VP_LOAD(cast<VPLoadSDNode>(N), Lo, Hi); break; + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: + SplitVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N), Lo, Hi); + break; case ISD::MLOAD: SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi); break; @@ -1006,23 +1011,34 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::ABS: + case ISD::VP_ABS: case ISD::BITREVERSE: + case ISD::VP_BITREVERSE: case ISD::BSWAP: + case ISD::VP_BSWAP: case ISD::CTLZ: + case ISD::VP_CTLZ: case ISD::CTTZ: + case ISD::VP_CTTZ: case ISD::CTLZ_ZERO_UNDEF: + case ISD::VP_CTLZ_ZERO_UNDEF: case ISD::CTTZ_ZERO_UNDEF: + case ISD::VP_CTTZ_ZERO_UNDEF: case ISD::CTPOP: - case ISD::FABS: + case ISD::VP_CTPOP: + case ISD::FABS: case ISD::VP_FABS: case ISD::FCEIL: + case ISD::VP_FCEIL: case ISD::FCOS: case ISD::FEXP: case ISD::FEXP2: case ISD::FFLOOR: + case ISD::VP_FFLOOR: case ISD::FLOG: case ISD::FLOG10: case ISD::FLOG2: case ISD::FNEARBYINT: + case ISD::VP_FNEARBYINT: case ISD::FNEG: case ISD::VP_FNEG: case ISD::FREEZE: case ISD::ARITH_FENCE: @@ -1031,21 +1047,25 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_ROUND: case ISD::VP_FP_ROUND: case ISD::FP_TO_SINT: - case ISD::VP_FPTOSI: + case ISD::VP_FP_TO_SINT: case ISD::FP_TO_UINT: - case ISD::VP_FPTOUI: + case ISD::VP_FP_TO_UINT: case ISD::FRINT: + case ISD::VP_FRINT: case ISD::FROUND: + case ISD::VP_FROUND: case ISD::FROUNDEVEN: + case ISD::VP_FROUNDEVEN: case ISD::FSIN: - case ISD::FSQRT: + case ISD::FSQRT: case ISD::VP_SQRT: case ISD::FTRUNC: + case ISD::VP_FROUNDTOZERO: case ISD::SINT_TO_FP: - case ISD::VP_SITOFP: + case ISD::VP_SINT_TO_FP: case ISD::TRUNCATE: case ISD::VP_TRUNCATE: case ISD::UINT_TO_FP: - case ISD::VP_UITOFP: + case ISD::VP_UINT_TO_FP: case ISD::FCANONICALIZE: SplitVecRes_UnaryOp(N, Lo, Hi); break; @@ -1066,8 +1086,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FADD: case ISD::VP_FADD: case ISD::FSUB: case ISD::VP_FSUB: case ISD::FMUL: case ISD::VP_FMUL: - case ISD::FMINNUM: - case ISD::FMAXNUM: + case ISD::FMINNUM: case ISD::VP_FMINNUM: + case ISD::FMAXNUM: case ISD::VP_FMAXNUM: case ISD::FMINIMUM: case ISD::FMAXIMUM: case ISD::SDIV: case ISD::VP_SDIV: @@ -1083,10 +1103,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::UREM: case ISD::VP_UREM: case ISD::SREM: case ISD::VP_SREM: case ISD::FREM: case ISD::VP_FREM: - case ISD::SMIN: - case ISD::SMAX: - case ISD::UMIN: - case ISD::UMAX: + case ISD::SMIN: case ISD::VP_SMIN: + case ISD::SMAX: case ISD::VP_SMAX: + case ISD::UMIN: case ISD::VP_UMIN: + case ISD::UMAX: case ISD::VP_UMAX: case ISD::SADDSAT: case ISD::UADDSAT: case ISD::SSUBSAT: @@ -1095,11 +1115,14 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::USHLSAT: case ISD::ROTL: case ISD::ROTR: + case ISD::VP_FCOPYSIGN: SplitVecRes_BinOp(N, Lo, Hi); break; case ISD::FMA: case ISD::VP_FMA: case ISD::FSHL: + case ISD::VP_FSHL: case ISD::FSHR: + case ISD::VP_FSHR: SplitVecRes_TernaryOp(N, Lo, Hi); break; @@ -1143,13 +1166,13 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI, SDValue &Ptr, uint64_t *ScaledOffset) { SDLoc DL(N); - unsigned IncrementSize = MemVT.getSizeInBits().getKnownMinSize() / 8; + unsigned IncrementSize = MemVT.getSizeInBits().getKnownMinValue() / 8; if (MemVT.isScalableVector()) { SDNodeFlags Flags; SDValue BytesIncrement = DAG.getVScale( DL, Ptr.getValueType(), - APInt(Ptr.getValueSizeInBits().getFixedSize(), IncrementSize)); + APInt(Ptr.getValueSizeInBits().getFixedValue(), IncrementSize)); MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace()); Flags.setNoUnsignedWrap(true); if (ScaledOffset) @@ -1465,7 +1488,11 @@ void DAGTypeLegalizer::SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDLoc DL(N); SDValue ArgLo, ArgHi; SDValue Test = N->getOperand(1); - GetSplitVector(N->getOperand(0), ArgLo, ArgHi); + SDValue FpValue = N->getOperand(0); + if (getTypeAction(FpValue.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(FpValue, ArgLo, ArgHi); + else + std::tie(ArgLo, ArgHi) = DAG.SplitVector(FpValue, SDLoc(FpValue)); EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); @@ -1900,7 +1927,7 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, MPI = MachinePointerInfo(LD->getPointerInfo().getAddrSpace()); else MPI = LD->getPointerInfo().getWithOffset( - LoMemVT.getStoreSize().getFixedSize()); + LoMemVT.getStoreSize().getFixedValue()); MMO = DAG.getMachineFunction().getMachineMemOperand( MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment, @@ -1921,6 +1948,87 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, ReplaceValueWith(SDValue(LD, 1), Ch); } +void DAGTypeLegalizer::SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, + SDValue &Lo, SDValue &Hi) { + assert(SLD->isUnindexed() && + "Indexed VP strided load during type legalization!"); + assert(SLD->getOffset().isUndef() && + "Unexpected indexed variable-length load offset"); + + SDLoc DL(SLD); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(SLD->getValueType(0)); + + EVT LoMemVT, HiMemVT; + bool HiIsEmpty = false; + std::tie(LoMemVT, HiMemVT) = + DAG.GetDependentSplitDestVTs(SLD->getMemoryVT(), LoVT, &HiIsEmpty); + + SDValue Mask = SLD->getMask(); + SDValue LoMask, HiMask; + if (Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), LoMask, HiMask); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, LoMask, HiMask); + else + std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); + } + + SDValue LoEVL, HiEVL; + std::tie(LoEVL, HiEVL) = + DAG.SplitEVL(SLD->getVectorLength(), SLD->getValueType(0), DL); + + // Generate the low vp_strided_load + Lo = DAG.getStridedLoadVP( + SLD->getAddressingMode(), SLD->getExtensionType(), LoVT, DL, + SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(), SLD->getStride(), + LoMask, LoEVL, LoMemVT, SLD->getMemOperand(), SLD->isExpandingLoad()); + + if (HiIsEmpty) { + // The high vp_strided_load has zero storage size. We therefore simply set + // it to the low vp_strided_load and rely on subsequent removal from the + // chain. + Hi = Lo; + } else { + // Generate the high vp_strided_load. + // To calculate the high base address, we need to sum to the low base + // address stride number of bytes for each element already loaded by low, + // that is: Ptr = Ptr + (LoEVL * Stride) + EVT PtrVT = SLD->getBasePtr().getValueType(); + SDValue Increment = + DAG.getNode(ISD::MUL, DL, PtrVT, LoEVL, + DAG.getSExtOrTrunc(SLD->getStride(), DL, PtrVT)); + SDValue Ptr = + DAG.getNode(ISD::ADD, DL, PtrVT, SLD->getBasePtr(), Increment); + + Align Alignment = SLD->getOriginalAlign(); + if (LoMemVT.isScalableVector()) + Alignment = commonAlignment( + Alignment, LoMemVT.getSizeInBits().getKnownMinValue() / 8); + + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(SLD->getPointerInfo().getAddrSpace()), + MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment, + SLD->getAAInfo(), SLD->getRanges()); + + Hi = DAG.getStridedLoadVP(SLD->getAddressingMode(), SLD->getExtensionType(), + HiVT, DL, SLD->getChain(), Ptr, SLD->getOffset(), + SLD->getStride(), HiMask, HiEVL, HiMemVT, MMO, + SLD->isExpandingLoad()); + } + + // Build a factor node to remember that this load is independent of the + // other one. + SDValue Ch = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(SLD, 1), Ch); +} + void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi) { assert(MLD->isUnindexed() && "Indexed masked load during type legalization!"); @@ -1983,7 +2091,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, MPI = MachinePointerInfo(MLD->getPointerInfo().getAddrSpace()); else MPI = MLD->getPointerInfo().getWithOffset( - LoMemVT.getStoreSize().getFixedSize()); + LoMemVT.getStoreSize().getFixedValue()); MMO = DAG.getMachineFunction().getMachineMemOperand( MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment, @@ -2286,13 +2394,13 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, // If Lo or Hi uses elements from at most two of the four input vectors, then // express it as a vector shuffle of those two inputs. Otherwise extract the // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR. - SmallVector<int> OrigMask(N->getMask().begin(), N->getMask().end()); + SmallVector<int> OrigMask(N->getMask()); // Try to pack incoming shuffles/inputs. auto &&TryPeekThroughShufflesInputs = [&Inputs, &NewVT, this, NewElts, &DL](SmallVectorImpl<int> &Mask) { // Check if all inputs are shuffles of the same operands or non-shuffles. MapVector<std::pair<SDValue, SDValue>, SmallVector<unsigned>> ShufflesIdxs; - for (unsigned Idx = 0; Idx < array_lengthof(Inputs); ++Idx) { + for (unsigned Idx = 0; Idx < std::size(Inputs); ++Idx) { SDValue Input = Inputs[Idx]; auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Input.getNode()); if (!Shuffle || @@ -2339,7 +2447,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, ShufflesIdxs[std::make_pair(P.first.second, P.first.first)].clear(); } // Check if any concat_vectors can be simplified. - SmallBitVector UsedSubVector(2 * array_lengthof(Inputs)); + SmallBitVector UsedSubVector(2 * std::size(Inputs)); for (int &Idx : Mask) { if (Idx == UndefMaskElem) continue; @@ -2359,7 +2467,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, } if (UsedSubVector.count() > 1) { SmallVector<SmallVector<std::pair<unsigned, int>, 2>> Pairs; - for (unsigned I = 0; I < array_lengthof(Inputs); ++I) { + for (unsigned I = 0; I < std::size(Inputs); ++I) { if (UsedSubVector.test(2 * I) == UsedSubVector.test(2 * I + 1)) continue; if (Pairs.empty() || Pairs.back().size() == 2) @@ -2403,7 +2511,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, // Try to remove extra shuffles (except broadcasts) and shuffles with the // reused operands. Changed = false; - for (unsigned I = 0; I < array_lengthof(Inputs); ++I) { + for (unsigned I = 0; I < std::size(Inputs); ++I) { auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Inputs[I].getNode()); if (!Shuffle) continue; @@ -2495,15 +2603,15 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, NewElts](SmallVectorImpl<int> &Mask) { SetVector<SDValue> UniqueInputs; SetVector<SDValue> UniqueConstantInputs; - for (unsigned I = 0; I < array_lengthof(Inputs); ++I) { - if (IsConstant(Inputs[I])) - UniqueConstantInputs.insert(Inputs[I]); - else if (!Inputs[I].isUndef()) - UniqueInputs.insert(Inputs[I]); + for (const auto &I : Inputs) { + if (IsConstant(I)) + UniqueConstantInputs.insert(I); + else if (!I.isUndef()) + UniqueInputs.insert(I); } // Adjust mask in case of reused inputs. Also, need to insert constant // inputs at first, otherwise it affects the final outcome. - if (UniqueInputs.size() != array_lengthof(Inputs)) { + if (UniqueInputs.size() != std::size(Inputs)) { auto &&UniqueVec = UniqueInputs.takeVector(); auto &&UniqueConstantVec = UniqueConstantInputs.takeVector(); unsigned ConstNum = UniqueConstantVec.size(); @@ -2541,8 +2649,8 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, // Build a shuffle mask for the output, discovering on the fly which // input vectors to use as shuffle operands. unsigned FirstMaskIdx = High * NewElts; - SmallVector<int> Mask(NewElts * array_lengthof(Inputs), UndefMaskElem); - copy(makeArrayRef(OrigMask).slice(FirstMaskIdx, NewElts), Mask.begin()); + SmallVector<int> Mask(NewElts * std::size(Inputs), UndefMaskElem); + copy(ArrayRef(OrigMask).slice(FirstMaskIdx, NewElts), Mask.begin()); assert(!Output && "Expected default initialized initial value."); TryPeekThroughShufflesInputs(Mask); MakeUniqueInputs(Mask); @@ -2561,7 +2669,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, return SecondIteration; }; processShuffleMasks( - Mask, array_lengthof(Inputs), array_lengthof(Inputs), + Mask, std::size(Inputs), std::size(Inputs), /*NumOfUsedRegs=*/1, [&Output, &DAG = DAG, NewVT]() { Output = DAG.getUNDEF(NewVT); }, [&Output, &DAG = DAG, NewVT, &DL, &Inputs, @@ -2707,6 +2815,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::VP_STORE: Res = SplitVecOp_VP_STORE(cast<VPStoreSDNode>(N), OpNo); break; + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + Res = SplitVecOp_VP_STRIDED_STORE(cast<VPStridedStoreSDNode>(N), OpNo); + break; case ISD::MSTORE: Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo); break; @@ -2725,6 +2836,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::STRICT_UINT_TO_FP: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: + case ISD::VP_SINT_TO_FP: + case ISD::VP_UINT_TO_FP: if (N->getValueType(0).bitsLT( N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType())) Res = SplitVecOp_TruncateHelper(N); @@ -2737,6 +2850,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::VP_FP_TO_SINT: + case ISD::VP_FP_TO_UINT: case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_EXTEND: @@ -2999,29 +3114,57 @@ SDValue DAGTypeLegalizer::SplitVecOp_INSERT_SUBVECTOR(SDNode *N, SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { // We know that the extracted result type is legal. EVT SubVT = N->getValueType(0); - SDValue Idx = N->getOperand(1); SDLoc dl(N); SDValue Lo, Hi; - if (SubVT.isScalableVector() != - N->getOperand(0).getValueType().isScalableVector()) - report_fatal_error("Extracting a fixed-length vector from an illegal " - "scalable vector is not yet supported"); - GetSplitVector(N->getOperand(0), Lo, Hi); - uint64_t LoElts = Lo.getValueType().getVectorMinNumElements(); + uint64_t LoEltsMin = Lo.getValueType().getVectorMinNumElements(); uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); - if (IdxVal < LoElts) { - assert(IdxVal + SubVT.getVectorMinNumElements() <= LoElts && + if (IdxVal < LoEltsMin) { + assert(IdxVal + SubVT.getVectorMinNumElements() <= LoEltsMin && "Extracted subvector crosses vector split!"); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); - } else { + } else if (SubVT.isScalableVector() == + N->getOperand(0).getValueType().isScalableVector()) return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi, - DAG.getVectorIdxConstant(IdxVal - LoElts, dl)); - } + DAG.getVectorIdxConstant(IdxVal - LoEltsMin, dl)); + + // After this point the DAG node only permits extracting fixed-width + // subvectors from scalable vectors. + assert(SubVT.isFixedLengthVector() && + "Extracting scalable subvector from fixed-width unsupported"); + + // If the element type is i1 and we're not promoting the result, then we may + // end up loading the wrong data since the bits are packed tightly into + // bytes. For example, if we extract a v4i1 (legal) from a nxv4i1 (legal) + // type at index 4, then we will load a byte starting at index 0. + if (SubVT.getScalarType() == MVT::i1) + report_fatal_error("Don't know how to extract fixed-width predicate " + "subvector from a scalable predicate vector"); + + // Spill the vector to the stack. We should use the alignment for + // the smallest part. + SDValue Vec = N->getOperand(0); + EVT VecVT = Vec.getValueType(); + Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false); + SDValue StackPtr = + DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign); + auto &MF = DAG.getMachineFunction(); + auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, + SmallestAlign); + + // Extract the subvector by loading the correct part. + StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVT, Idx); + + return DAG.getLoad( + SubVT, dl, Store, StackPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); } SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -3029,8 +3172,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue Idx = N->getOperand(1); EVT VecVT = Vec.getValueType(); - if (isa<ConstantSDNode>(Idx)) { - uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + if (const ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Idx)) { + uint64_t IdxVal = Index->getZExtValue(); SDValue Lo, Hi; GetSplitVector(Vec, Lo, Hi); @@ -3167,11 +3310,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo) { MachinePointerInfo MPI; if (LoMemVT.isScalableVector()) { Alignment = commonAlignment(Alignment, - LoMemVT.getSizeInBits().getKnownMinSize() / 8); + LoMemVT.getSizeInBits().getKnownMinValue() / 8); MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace()); } else MPI = N->getPointerInfo().getWithOffset( - LoMemVT.getStoreSize().getFixedSize()); + LoMemVT.getStoreSize().getFixedValue()); MMO = DAG.getMachineFunction().getMachineMemOperand( MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment, @@ -3186,6 +3329,80 @@ SDValue DAGTypeLegalizer::SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo) { return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); } +SDValue DAGTypeLegalizer::SplitVecOp_VP_STRIDED_STORE(VPStridedStoreSDNode *N, + unsigned OpNo) { + assert(N->isUnindexed() && "Indexed vp_strided_store of a vector?"); + assert(N->getOffset().isUndef() && "Unexpected VP strided store offset"); + + SDLoc DL(N); + + SDValue Data = N->getValue(); + SDValue LoData, HiData; + if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Data, LoData, HiData); + else + std::tie(LoData, HiData) = DAG.SplitVector(Data, DL); + + EVT LoMemVT, HiMemVT; + bool HiIsEmpty = false; + std::tie(LoMemVT, HiMemVT) = DAG.GetDependentSplitDestVTs( + N->getMemoryVT(), LoData.getValueType(), &HiIsEmpty); + + SDValue Mask = N->getMask(); + SDValue LoMask, HiMask; + if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) + SplitVecRes_SETCC(Mask.getNode(), LoMask, HiMask); + else if (getTypeAction(Mask.getValueType()) == + TargetLowering::TypeSplitVector) + GetSplitVector(Mask, LoMask, HiMask); + else + std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); + + SDValue LoEVL, HiEVL; + std::tie(LoEVL, HiEVL) = + DAG.SplitEVL(N->getVectorLength(), Data.getValueType(), DL); + + // Generate the low vp_strided_store + SDValue Lo = DAG.getStridedStoreVP( + N->getChain(), DL, LoData, N->getBasePtr(), N->getOffset(), + N->getStride(), LoMask, LoEVL, LoMemVT, N->getMemOperand(), + N->getAddressingMode(), N->isTruncatingStore(), N->isCompressingStore()); + + // If the high vp_strided_store has zero storage size, only the low + // vp_strided_store is needed. + if (HiIsEmpty) + return Lo; + + // Generate the high vp_strided_store. + // To calculate the high base address, we need to sum to the low base + // address stride number of bytes for each element already stored by low, + // that is: Ptr = Ptr + (LoEVL * Stride) + EVT PtrVT = N->getBasePtr().getValueType(); + SDValue Increment = + DAG.getNode(ISD::MUL, DL, PtrVT, LoEVL, + DAG.getSExtOrTrunc(N->getStride(), DL, PtrVT)); + SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, N->getBasePtr(), Increment); + + Align Alignment = N->getOriginalAlign(); + if (LoMemVT.isScalableVector()) + Alignment = commonAlignment(Alignment, + LoMemVT.getSizeInBits().getKnownMinValue() / 8); + + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(N->getPointerInfo().getAddrSpace()), + MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment, + N->getAAInfo(), N->getRanges()); + + SDValue Hi = DAG.getStridedStoreVP( + N->getChain(), DL, HiData, Ptr, N->getOffset(), N->getStride(), HiMask, + HiEVL, HiMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), + N->isCompressingStore()); + + // Build a factor node to remember that this store is independent of the + // other one. + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); +} + SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo) { assert(N->isUnindexed() && "Indexed masked store of vector?"); @@ -3243,11 +3460,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, MachinePointerInfo MPI; if (LoMemVT.isScalableVector()) { Alignment = commonAlignment( - Alignment, LoMemVT.getSizeInBits().getKnownMinSize() / 8); + Alignment, LoMemVT.getSizeInBits().getKnownMinValue() / 8); MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace()); } else MPI = N->getPointerInfo().getWithOffset( - LoMemVT.getStoreSize().getFixedSize()); + LoMemVT.getStoreSize().getFixedValue()); MMO = DAG.getMachineFunction().getMachineMemOperand( MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment, @@ -3593,7 +3810,26 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) { // The result (and the first input) has a legal vector type, but the second // input needs splitting. - return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements()); + + SDLoc DL(N); + + EVT LHSLoVT, LHSHiVT; + std::tie(LHSLoVT, LHSHiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + if (!isTypeLegal(LHSLoVT) || !isTypeLegal(LHSHiVT)) + return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements()); + + SDValue LHSLo, LHSHi; + std::tie(LHSLo, LHSHi) = + DAG.SplitVector(N->getOperand(0), DL, LHSLoVT, LHSHiVT); + + SDValue RHSLo, RHSHi; + std::tie(RHSLo, RHSHi) = DAG.SplitVector(N->getOperand(1), DL); + + SDValue Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLoVT, LHSLo, RHSLo); + SDValue Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHiVT, LHSHi, RHSHi); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0), Lo, Hi); } SDValue DAGTypeLegalizer::SplitVecOp_FP_TO_XINT_SAT(SDNode *N) { @@ -3683,6 +3919,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_LOAD: Res = WidenVecRes_VP_LOAD(cast<VPLoadSDNode>(N)); break; + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: + Res = WidenVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N)); + break; case ISD::MLOAD: Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N)); break; @@ -3692,6 +3931,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_GATHER: Res = WidenVecRes_VP_GATHER(cast<VPGatherSDNode>(N)); break; + case ISD::VECTOR_REVERSE: + Res = WidenVecRes_VECTOR_REVERSE(N); + break; case ISD::ADD: case ISD::VP_ADD: case ISD::AND: case ISD::VP_AND: @@ -3704,14 +3946,14 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::SHL: case ISD::VP_SHL: case ISD::SRA: case ISD::VP_ASHR: case ISD::SRL: case ISD::VP_LSHR: - case ISD::FMINNUM: - case ISD::FMAXNUM: + case ISD::FMINNUM: case ISD::VP_FMINNUM: + case ISD::FMAXNUM: case ISD::VP_FMAXNUM: case ISD::FMINIMUM: case ISD::FMAXIMUM: - case ISD::SMIN: - case ISD::SMAX: - case ISD::UMIN: - case ISD::UMAX: + case ISD::SMIN: case ISD::VP_SMIN: + case ISD::SMAX: case ISD::VP_SMAX: + case ISD::UMIN: case ISD::VP_UMIN: + case ISD::UMAX: case ISD::VP_UMAX: case ISD::UADDSAT: case ISD::SADDSAT: case ISD::USUBSAT: @@ -3738,6 +3980,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_FMUL: case ISD::VP_FDIV: case ISD::VP_FREM: + case ISD::VP_FCOPYSIGN: Res = WidenVecRes_Binary(N); break; @@ -3748,7 +3991,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { // If the target has custom/legal support for the scalar FP intrinsic ops // (they are probably not destined to become libcalls), then widen those // like any other binary ops. - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::FADD: case ISD::FMUL: @@ -3809,17 +4052,17 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_ROUND: case ISD::VP_FP_ROUND: case ISD::FP_TO_SINT: - case ISD::VP_FPTOSI: + case ISD::VP_FP_TO_SINT: case ISD::FP_TO_UINT: - case ISD::VP_FPTOUI: + case ISD::VP_FP_TO_UINT: case ISD::SIGN_EXTEND: case ISD::VP_SIGN_EXTEND: case ISD::SINT_TO_FP: - case ISD::VP_SITOFP: + case ISD::VP_SINT_TO_FP: case ISD::VP_TRUNCATE: case ISD::TRUNCATE: case ISD::UINT_TO_FP: - case ISD::VP_UITOFP: + case ISD::VP_UINT_TO_FP: case ISD::ZERO_EXTEND: case ISD::VP_ZERO_EXTEND: Res = WidenVecRes_Convert(N); @@ -3851,17 +4094,34 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { // If the target has custom/legal support for the scalar FP intrinsic ops // (they are probably not destined to become libcalls), then widen those // like any other unary ops. - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::ABS: + case ISD::VP_ABS: case ISD::BITREVERSE: + case ISD::VP_BITREVERSE: case ISD::BSWAP: + case ISD::VP_BSWAP: case ISD::CTLZ: + case ISD::VP_CTLZ: case ISD::CTLZ_ZERO_UNDEF: + case ISD::VP_CTLZ_ZERO_UNDEF: case ISD::CTPOP: + case ISD::VP_CTPOP: case ISD::CTTZ: + case ISD::VP_CTTZ: case ISD::CTTZ_ZERO_UNDEF: + case ISD::VP_CTTZ_ZERO_UNDEF: case ISD::FNEG: case ISD::VP_FNEG: + case ISD::VP_FABS: + case ISD::VP_SQRT: + case ISD::VP_FCEIL: + case ISD::VP_FFLOOR: + case ISD::VP_FRINT: + case ISD::VP_FNEARBYINT: + case ISD::VP_FROUND: + case ISD::VP_FROUNDEVEN: + case ISD::VP_FROUNDTOZERO: case ISD::FREEZE: case ISD::ARITH_FENCE: case ISD::FCANONICALIZE: @@ -3869,7 +4129,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::FMA: case ISD::VP_FMA: case ISD::FSHL: + case ISD::VP_FSHL: case ISD::FSHR: + case ISD::VP_FSHR: Res = WidenVecRes_Ternary(N); break; } @@ -4005,7 +4267,7 @@ static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI, ConcatOps[j] = UndefVal; } return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, - makeArrayRef(ConcatOps.data(), NumOps)); + ArrayRef(ConcatOps.data(), NumOps)); } SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { @@ -4480,8 +4742,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_IS_FPCLASS(SDNode *N) { + SDValue FpValue = N->getOperand(0); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Arg = GetWidenedVector(N->getOperand(0)); + if (getTypeAction(FpValue.getValueType()) != TargetLowering::TypeWidenVector) + return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); + SDValue Arg = GetWidenedVector(FpValue); return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, {Arg, N->getOperand(1)}, N->getFlags()); } @@ -4585,33 +4850,43 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { unsigned WidenSize = WidenVT.getSizeInBits(); unsigned InSize = InVT.getSizeInBits(); + unsigned InScalarSize = InVT.getScalarSizeInBits(); // x86mmx is not an acceptable vector element type, so don't try. - if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) { + if (WidenSize % InScalarSize == 0 && InVT != MVT::x86mmx) { // Determine new input vector type. The new input vector type will use // the same element type (if its a vector) or use the input type as a // vector. It is the same size as the type to widen to. EVT NewInVT; - unsigned NewNumElts = WidenSize / InSize; + unsigned NewNumParts = WidenSize / InSize; if (InVT.isVector()) { EVT InEltVT = InVT.getVectorElementType(); NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenSize / InEltVT.getSizeInBits()); } else { - NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts); + NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumParts); } if (TLI.isTypeLegal(NewInVT)) { SDValue NewVec; if (InVT.isVector()) { // Because the result and the input are different vector types, widening - // the result could create a legal type but widening the input might make - // it an illegal type that might lead to repeatedly splitting the input - // and then widening it. To avoid this, we widen the input only if + // the result could create a legal type but widening the input might + // make it an illegal type that might lead to repeatedly splitting the + // input and then widening it. To avoid this, we widen the input only if // it results in a legal type. - SmallVector<SDValue, 16> Ops(NewNumElts, DAG.getUNDEF(InVT)); - Ops[0] = InOp; + if (WidenSize % InSize == 0) { + SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getUNDEF(InVT)); + Ops[0] = InOp; - NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops); + NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops); + } else { + SmallVector<SDValue, 16> Ops; + DAG.ExtractVectorElements(InOp, Ops); + Ops.append(WidenSize / InScalarSize - Ops.size(), + DAG.getUNDEF(InVT.getVectorElementType())); + + NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops); + } } else { NewVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewInVT, InOp); } @@ -4768,7 +5043,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { // nxv2i64 extract_subvector(nxv16i64, 8) // nxv2i64 extract_subvector(nxv16i64, 10) // undef) - unsigned GCD = greatestCommonDivisor(VTNumElts, WidenNumElts); + unsigned GCD = std::gcd(VTNumElts, WidenNumElts); assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken " "down type's element count"); EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT, @@ -4915,6 +5190,33 @@ SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD(VPLoadSDNode *N) { return Res; } +SDValue DAGTypeLegalizer::WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N) { + SDLoc DL(N); + + // The mask should be widened as well + SDValue Mask = N->getMask(); + assert(getTypeAction(Mask.getValueType()) == + TargetLowering::TypeWidenVector && + "Unable to widen VP strided load"); + Mask = GetWidenedVector(Mask); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + assert(Mask.getValueType().getVectorElementCount() == + WidenVT.getVectorElementCount() && + "Data and mask vectors should have the same number of elements"); + + SDValue Res = DAG.getStridedLoadVP( + N->getAddressingMode(), N->getExtensionType(), WidenVT, DL, N->getChain(), + N->getBasePtr(), N->getOffset(), N->getStride(), Mask, + N->getVectorLength(), N->getMemoryVT(), N->getMemOperand(), + N->isExpandingLoad()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0)); @@ -5316,6 +5618,61 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask); } +SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_REVERSE(SDNode *N) { + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + SDLoc dl(N); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue OpValue = GetWidenedVector(N->getOperand(0)); + assert(WidenVT == OpValue.getValueType() && "Unexpected widened vector type"); + + SDValue ReverseVal = DAG.getNode(ISD::VECTOR_REVERSE, dl, WidenVT, OpValue); + unsigned WidenNumElts = WidenVT.getVectorMinNumElements(); + unsigned VTNumElts = VT.getVectorMinNumElements(); + unsigned IdxVal = WidenNumElts - VTNumElts; + + if (VT.isScalableVector()) { + // Try to split the 'Widen ReverseVal' into smaller extracts and concat the + // results together, e.g.(nxv6i64 -> nxv8i64) + // nxv8i64 vector_reverse + // <-> + // nxv8i64 concat( + // nxv2i64 extract_subvector(nxv8i64, 2) + // nxv2i64 extract_subvector(nxv8i64, 4) + // nxv2i64 extract_subvector(nxv8i64, 6) + // nxv2i64 undef) + + unsigned GCD = std::gcd(VTNumElts, WidenNumElts); + EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT, + ElementCount::getScalable(GCD)); + assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken " + "down type's element count"); + SmallVector<SDValue> Parts; + unsigned i = 0; + for (; i < VTNumElts / GCD; ++i) + Parts.push_back( + DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, PartVT, ReverseVal, + DAG.getVectorIdxConstant(IdxVal + i * GCD, dl))); + for (; i < WidenNumElts / GCD; ++i) + Parts.push_back(DAG.getUNDEF(PartVT)); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts); + } + + // Use VECTOR_SHUFFLE to combine new vector from 'ReverseVal' for + // fixed-vectors. + SmallVector<int, 16> Mask; + for (unsigned i = 0; i != VTNumElts; ++i) { + Mask.push_back(IdxVal + i); + } + for (unsigned i = VTNumElts; i != WidenNumElts; ++i) + Mask.push_back(-1); + + return DAG.getVectorShuffle(WidenVT, dl, ReverseVal, DAG.getUNDEF(WidenVT), + Mask); +} + SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && @@ -5432,6 +5789,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; case ISD::VP_STORE: Res = WidenVecOp_VP_STORE(N, OpNo); break; + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + Res = WidenVecOp_VP_STRIDED_STORE(N, OpNo); + break; case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break; case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break; case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break; @@ -5910,6 +6270,38 @@ SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) { ST->isCompressingStore()); } +SDValue DAGTypeLegalizer::WidenVecOp_VP_STRIDED_STORE(SDNode *N, + unsigned OpNo) { + assert((OpNo == 1 || OpNo == 4) && + "Can widen only data or mask operand of vp_strided_store"); + VPStridedStoreSDNode *SST = cast<VPStridedStoreSDNode>(N); + SDValue Mask = SST->getMask(); + SDValue StVal = SST->getValue(); + SDLoc DL(N); + + if (OpNo == 1) + assert(getTypeAction(Mask.getValueType()) == + TargetLowering::TypeWidenVector && + "Unable to widen VP strided store"); + else + assert(getTypeAction(StVal.getValueType()) == + TargetLowering::TypeWidenVector && + "Unable to widen VP strided store"); + + StVal = GetWidenedVector(StVal); + Mask = GetWidenedVector(Mask); + + assert(StVal.getValueType().getVectorElementCount() == + Mask.getValueType().getVectorElementCount() && + "Data and mask vectors should have the same number of elements"); + + return DAG.getStridedStoreVP( + SST->getChain(), DL, StVal, SST->getBasePtr(), SST->getOffset(), + SST->getStride(), Mask, SST->getVectorLength(), SST->getMemoryVT(), + SST->getMemOperand(), SST->getAddressingMode(), SST->isTruncatingStore(), + SST->isCompressingStore()); +} + SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { assert((OpNo == 1 || OpNo == 3) && "Can widen only data or mask operand of mstore"); @@ -6127,7 +6519,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { unsigned WideElts = WideVT.getVectorMinNumElements(); if (WideVT.isScalableVector()) { - unsigned GCD = greatestCommonDivisor(OrigElts, WideElts); + unsigned GCD = std::gcd(OrigElts, WideElts); EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, ElementCount::getScalable(GCD)); SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem); @@ -6164,7 +6556,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) { unsigned WideElts = WideVT.getVectorMinNumElements(); if (WideVT.isScalableVector()) { - unsigned GCD = greatestCommonDivisor(OrigElts, WideElts); + unsigned GCD = std::gcd(OrigElts, WideElts); EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, ElementCount::getScalable(GCD)); SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem); @@ -6223,12 +6615,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) { // Align: If 0, don't allow use of a wider type // WidenEx: If Align is not 0, the amount additional we can load/store from. -static Optional<EVT> findMemType(SelectionDAG &DAG, const TargetLowering &TLI, - unsigned Width, EVT WidenVT, - unsigned Align = 0, unsigned WidenEx = 0) { +static std::optional<EVT> findMemType(SelectionDAG &DAG, + const TargetLowering &TLI, unsigned Width, + EVT WidenVT, unsigned Align = 0, + unsigned WidenEx = 0) { EVT WidenEltVT = WidenVT.getVectorElementType(); const bool Scalable = WidenVT.isScalableVector(); - unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinSize(); + unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinValue(); unsigned WidenEltWidth = WidenEltVT.getSizeInBits(); unsigned AlignInBits = Align*8; @@ -6266,7 +6659,7 @@ static Optional<EVT> findMemType(SelectionDAG &DAG, const TargetLowering &TLI, // Skip vector MVTs which don't match the scalable property of WidenVT. if (Scalable != MemVT.isScalableVector()) continue; - unsigned MemVTWidth = MemVT.getSizeInBits().getKnownMinSize(); + unsigned MemVTWidth = MemVT.getSizeInBits().getKnownMinValue(); auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT); if ((Action == TargetLowering::TypeLegal || Action == TargetLowering::TypePromoteInteger) && @@ -6283,7 +6676,7 @@ static Optional<EVT> findMemType(SelectionDAG &DAG, const TargetLowering &TLI, // Using element-wise loads and stores for widening operations is not // supported for scalable vectors if (Scalable) - return None; + return std::nullopt; return RetVT; } @@ -6348,9 +6741,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlign().value(); // Find the vector type that can load from. - Optional<EVT> FirstVT = - findMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign, - WidthDiff.getKnownMinSize()); + std::optional<EVT> FirstVT = + findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, LdAlign, + WidthDiff.getKnownMinValue()); if (!FirstVT) return SDValue(); @@ -6361,15 +6754,15 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, // Unless we're able to load in one instruction we must work out how to load // the remainder. if (!TypeSize::isKnownLE(LdWidth, FirstVTWidth)) { - Optional<EVT> NewVT = FirstVT; + std::optional<EVT> NewVT = FirstVT; TypeSize RemainingWidth = LdWidth; TypeSize NewVTWidth = FirstVTWidth; do { RemainingWidth -= NewVTWidth; if (TypeSize::isKnownLT(RemainingWidth, NewVTWidth)) { // The current type we are using is too large. Find a better size. - NewVT = findMemType(DAG, TLI, RemainingWidth.getKnownMinSize(), WidenVT, - LdAlign, WidthDiff.getKnownMinSize()); + NewVT = findMemType(DAG, TLI, RemainingWidth.getKnownMinValue(), + WidenVT, LdAlign, WidthDiff.getKnownMinValue()); if (!NewVT) return SDValue(); NewVTWidth = NewVT->getSizeInBits(); @@ -6387,7 +6780,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); if (!FirstVT->isVector()) { unsigned NumElts = - WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize(); + WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts); SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); @@ -6396,9 +6789,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, return LdOp; // TODO: We don't currently have any tests that exercise this code path. - assert(WidenWidth.getFixedSize() % FirstVTWidth.getFixedSize() == 0); + assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0); unsigned NumConcat = - WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize(); + WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); SmallVector<SDValue, 16> ConcatOps(NumConcat); SDValue UndefVal = DAG.getUNDEF(*FirstVT); ConcatOps[0] = LdOp; @@ -6461,9 +6854,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, TypeSize LdTySize = LdTy.getSizeInBits(); TypeSize NewLdTySize = NewLdTy.getSizeInBits(); assert(NewLdTySize.isScalable() == LdTySize.isScalable() && - NewLdTySize.isKnownMultipleOf(LdTySize.getKnownMinSize())); + NewLdTySize.isKnownMultipleOf(LdTySize.getKnownMinValue())); unsigned NumOps = - NewLdTySize.getKnownMinSize() / LdTySize.getKnownMinSize(); + NewLdTySize.getKnownMinValue() / LdTySize.getKnownMinValue(); SmallVector<SDValue, 16> WidenOps(NumOps); unsigned j = 0; for (; j != End-Idx; ++j) @@ -6481,11 +6874,11 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, if (WidenWidth == LdTy.getSizeInBits() * (End - Idx)) return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, - makeArrayRef(&ConcatOps[Idx], End - Idx)); + ArrayRef(&ConcatOps[Idx], End - Idx)); // We need to fill the rest with undefs to build the vector. unsigned NumOps = - WidenWidth.getKnownMinSize() / LdTy.getSizeInBits().getKnownMinSize(); + WidenWidth.getKnownMinValue() / LdTy.getSizeInBits().getKnownMinValue(); SmallVector<SDValue, 16> WidenOps(NumOps); SDValue UndefVal = DAG.getUNDEF(LdTy); { @@ -6584,8 +6977,8 @@ bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, while (StWidth.isNonZero()) { // Find the largest vector type we can store with. - Optional<EVT> NewVT = - findMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT); + std::optional<EVT> NewVT = + findMemType(DAG, TLI, StWidth.getKnownMinValue(), ValVT); if (!NewVT) return false; MemVTs.push_back({*NewVT, 0}); @@ -6620,11 +7013,11 @@ bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, } while (--Count); } else { // Cast the vector to the scalar type we can store. - unsigned NumElts = ValWidth.getFixedSize() / NewVTWidth.getFixedSize(); + unsigned NumElts = ValWidth.getFixedValue() / NewVTWidth.getFixedValue(); EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp); // Readjust index position based on new vector type. - Idx = Idx * ValEltWidth / NewVTWidth.getFixedSize(); + Idx = Idx * ValEltWidth / NewVTWidth.getFixedValue(); do { SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, DAG.getVectorIdxConstant(Idx++, dl)); @@ -6636,7 +7029,7 @@ bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr); } while (--Count); // Restore index back to be relative to the original widen element type. - Idx = Idx * NewVTWidth.getFixedSize() / ValEltWidth; + Idx = Idx * NewVTWidth.getFixedValue() / ValEltWidth; } } @@ -6685,7 +7078,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, unsigned InNumElts = InEC.getFixedValue(); unsigned WidenNumElts = WidenEC.getFixedValue(); - // Fall back to extract and build. + // Fall back to extract and build (+ mask, if padding with zeros). SmallVector<SDValue, 16> Ops(WidenNumElts); EVT EltVT = NVT.getVectorElementType(); unsigned MinNumElts = std::min(WidenNumElts, InNumElts); @@ -6694,9 +7087,21 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, DAG.getVectorIdxConstant(Idx, dl)); - SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : - DAG.getUNDEF(EltVT); - for ( ; Idx < WidenNumElts; ++Idx) - Ops[Idx] = FillVal; - return DAG.getBuildVector(NVT, dl, Ops); + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; Idx < WidenNumElts; ++Idx) + Ops[Idx] = UndefVal; + + SDValue Widened = DAG.getBuildVector(NVT, dl, Ops); + if (!FillWithZeroes) + return Widened; + + assert(NVT.isInteger() && + "We expect to never want to FillWithZeroes for non-integral types."); + + SmallVector<SDValue, 16> MaskOps; + MaskOps.append(MinNumElts, DAG.getAllOnesConstant(dl, EltVT)); + MaskOps.append(WidenNumElts - MinNumElts, DAG.getConstant(0, dl, EltVT)); + + return DAG.getNode(ISD::AND, dl, NVT, Widened, + DAG.getBuildVector(NVT, dl, MaskOps)); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 3ac2a7bddc5a..2d93adea6b9b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -426,10 +426,11 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, NumRes = 1; } else { const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); - assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + assert(!MCID.implicit_defs().empty() && + "Physical reg def must be in implicit def list!"); NumRes = MCID.getNumDefs(); - for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { - if (Reg == *ImpDef) + for (MCPhysReg ImpDef : MCID.implicit_defs()) { + if (Reg == ImpDef) break; ++NumRes; } @@ -526,11 +527,8 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, if (!Node->isMachineOpcode()) continue; const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); - if (!MCID.ImplicitDefs) - continue; - for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { - CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); - } + for (MCPhysReg Reg : MCID.implicit_defs()) + CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); } return !LRegs.empty(); } @@ -777,8 +775,7 @@ void ScheduleDAGLinearize::Schedule() { MachineBasicBlock* ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { - InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos, - DAG->getUseInstrRefDebugInfo()); + InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); DenseMap<SDValue, Register> VRBaseMap; LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; }); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 8a04ce7535a1..c252046ef10b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -24,7 +24,7 @@ #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" @@ -302,6 +302,8 @@ private: } // end anonymous namespace +static constexpr unsigned RegSequenceCost = 1; + /// GetCostForDef - Looks up the register class and cost for a given definition. /// Typically this just means looking up the representative register class, /// but for untyped values (MVT::Untyped) it means inspecting the node's @@ -321,7 +323,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, // Special handling for CopyFromReg of untyped values. if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) { - unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg); RegClass = RC->getID(); Cost = 1; @@ -333,13 +335,14 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); RegClass = RC->getID(); - Cost = 1; + Cost = RegSequenceCost; return; } unsigned Idx = RegDefPos.GetIdx(); - const MCInstrDesc Desc = TII->get(Opcode); + const MCInstrDesc &Desc = TII->get(Opcode); const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF); + assert(RC && "Not a valid register class"); RegClass = RC->getID(); // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a // better way to determine it. @@ -1089,7 +1092,7 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { RemovePred(SU, Pred); AddPredQueued(NewSU, Pred); } - for (SDep D : NodeSuccs) { + for (SDep &D : NodeSuccs) { SUnit *SuccDep = D.getSUnit(); D.setSUnit(SU); RemovePred(SuccDep, D); @@ -1100,7 +1103,7 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { !D.isCtrl() && NewSU->NumRegDefsLeft > 0) --NewSU->NumRegDefsLeft; } - for (SDep D : ChainSuccs) { + for (SDep &D : ChainSuccs) { SUnit *SuccDep = D.getSUnit(); D.setSUnit(SU); RemovePred(SuccDep, D); @@ -1204,11 +1207,11 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { D.setSUnit(NewSU); AddPredQueued(SuccSU, D); D.setSUnit(SU); - DelDeps.push_back(std::make_pair(SuccSU, D)); + DelDeps.emplace_back(SuccSU, D); } } - for (auto &DelDep : DelDeps) - RemovePred(DelDep.first, DelDep.second); + for (const auto &[DelSU, DelD] : DelDeps) + RemovePred(DelSU, DelD); AvailableQueue->updateNode(SU); AvailableQueue->addNode(NewSU); @@ -1242,17 +1245,17 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, SDep D = Succ; D.setSUnit(CopyToSU); AddPredQueued(SuccSU, D); - DelDeps.push_back(std::make_pair(SuccSU, Succ)); + DelDeps.emplace_back(SuccSU, Succ); } else { - // Avoid scheduling the def-side copy before other successors. Otherwise + // Avoid scheduling the def-side copy before other successors. Otherwise, // we could introduce another physreg interference on the copy and // continue inserting copies indefinitely. AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial)); } } - for (auto &DelDep : DelDeps) - RemovePred(DelDep.first, DelDep.second); + for (const auto &[DelSU, DelD] : DelDeps) + RemovePred(DelSU, DelD); SDep FromDep(SU, SDep::Data, Reg); FromDep.setLatency(SU->Latency); @@ -1281,10 +1284,11 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, NumRes = 1; } else { const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); - assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + assert(!MCID.implicit_defs().empty() && + "Physical reg def must be in implicit def list!"); NumRes = MCID.getNumDefs(); - for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { - if (Reg == *ImpDef) + for (MCPhysReg ImpDef : MCID.implicit_defs()) { + if (Reg == ImpDef) break; ++NumRes; } @@ -1381,8 +1385,8 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { InlineAsm::isClobberKind(Flags)) { // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { - unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); - if (Register::isPhysicalRegister(Reg)) + Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + if (Reg.isPhysical()) CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); } } else @@ -1419,7 +1423,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { } if (const uint32_t *RegMask = getNodeRegMask(Node)) CheckForLiveRegDefMasked(SU, RegMask, - makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()), + ArrayRef(LiveRegDefs.get(), TRI->getNumRegs()), RegAdded, LRegs); const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); @@ -1429,16 +1433,14 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { // of %noreg. When the OptionalDef is set to a valid register, we need to // handle it in the same way as an ImplicitDef. for (unsigned i = 0; i < MCID.getNumDefs(); ++i) - if (MCID.OpInfo[i].isOptionalDef()) { + if (MCID.operands()[i].isOptionalDef()) { const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues()); - unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg(); + Register Reg = cast<RegisterSDNode>(OptionalDef)->getReg(); CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); } } - if (!MCID.ImplicitDefs) - continue; - for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) - CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); + for (MCPhysReg Reg : MCID.implicit_defs()) + CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); } return !LRegs.empty(); @@ -1484,16 +1486,15 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource"; else dbgs() << printReg(LRegs[0], TRI); dbgs() << " SU #" << CurSU->NodeNum << '\n'); - std::pair<LRegsMapT::iterator, bool> LRegsPair = - LRegsMap.insert(std::make_pair(CurSU, LRegs)); - if (LRegsPair.second) { + auto [LRegsIter, LRegsInserted] = LRegsMap.try_emplace(CurSU, LRegs); + if (LRegsInserted) { CurSU->isPending = true; // This SU is not in AvailableQueue right now. Interferences.push_back(CurSU); } else { assert(CurSU->isPending && "Interferences are pending"); // Update the interference with current live regs. - LRegsPair.first->second = LRegs; + LRegsIter->second = LRegs; } CurSU = AvailableQueue->pop(); } @@ -2302,6 +2303,16 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) { RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); continue; } + if (POpc == TargetOpcode::REG_SEQUENCE) { + unsigned DstRCIdx = + cast<ConstantSDNode>(PN->getOperand(0))->getZExtValue(); + const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); + unsigned RCId = RC->getID(); + // REG_SEQUENCE is untyped, so getRepRegClassCostFor could not be used + // here. Instead use the same constant as in GetCostForDef. + RegPressure[RCId] += RegSequenceCost; + continue; + } unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); for (unsigned i = 0; i != NumDefs; ++i) { MVT VT = PN->getSimpleValueType(i); @@ -2376,9 +2387,9 @@ static bool hasOnlyLiveInOpers(const SUnit *SU) { const SUnit *PredSU = Pred.getSUnit(); if (PredSU->getNode() && PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { - unsigned Reg = - cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); - if (Register::isVirtualRegister(Reg)) { + Register Reg = + cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); + if (Reg.isVirtual()) { RetVal = true; continue; } @@ -2397,9 +2408,9 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) { if (Succ.isCtrl()) continue; const SUnit *SuccSU = Succ.getSUnit(); if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { - unsigned Reg = - cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); - if (Register::isVirtualRegister(Reg)) { + Register Reg = + cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); + if (Reg.isVirtual()) { RetVal = true; continue; } @@ -2854,10 +2865,10 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - const MCPhysReg *ImpDefs - = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); + ArrayRef<MCPhysReg> ImpDefs = + TII->get(SU->getNode()->getMachineOpcode()).implicit_defs(); const uint32_t *RegMask = getNodeRegMask(SU->getNode()); - if(!ImpDefs && !RegMask) + if (ImpDefs.empty() && !RegMask) return false; for (const SDep &Succ : SU->Succs) { @@ -2871,14 +2882,14 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit())) return true; - if (ImpDefs) - for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef) - // Return true if SU clobbers this physical register use and the - // definition of the register reaches from DepSU. IsReachable queries - // a topological forward sort of the DAG (following the successors). - if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) && - scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit())) - return true; + for (MCPhysReg ImpDef : ImpDefs) { + // Return true if SU clobbers this physical register use and the + // definition of the register reaches from DepSU. IsReachable queries + // a topological forward sort of the DAG (following the successors). + if (TRI->regsOverlap(ImpDef, SuccPred.getReg()) && + scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit())) + return true; + } } } return false; @@ -2891,16 +2902,16 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetRegisterInfo *TRI) { SDNode *N = SuccSU->getNode(); unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); - const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); - assert(ImpDefs && "Caller should check hasPhysRegDefs"); + ArrayRef<MCPhysReg> ImpDefs = TII->get(N->getMachineOpcode()).implicit_defs(); + assert(!ImpDefs.empty() && "Caller should check hasPhysRegDefs"); for (const SDNode *SUNode = SU->getNode(); SUNode; SUNode = SUNode->getGluedNode()) { if (!SUNode->isMachineOpcode()) continue; - const MCPhysReg *SUImpDefs = - TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); + ArrayRef<MCPhysReg> SUImpDefs = + TII->get(SUNode->getMachineOpcode()).implicit_defs(); const uint32_t *SURegMask = getNodeRegMask(SUNode); - if (!SUImpDefs && !SURegMask) + if (SUImpDefs.empty() && !SURegMask) continue; for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { MVT VT = N->getSimpleValueType(i); @@ -2908,13 +2919,10 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, continue; if (!N->hasAnyUseOfValue(i)) continue; - unsigned Reg = ImpDefs[i - NumDefs]; + MCPhysReg Reg = ImpDefs[i - NumDefs]; if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg)) return true; - if (!SUImpDefs) - continue; - for (;*SUImpDefs; ++SUImpDefs) { - unsigned SUReg = *SUImpDefs; + for (MCPhysReg SUReg : SUImpDefs) { if (TRI->regsOverlap(Reg, SUReg)) return true; } @@ -2968,8 +2976,7 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // like other nodes from the perspective of scheduling heuristics. if (SDNode *N = SU.getNode()) if (N->getOpcode() == ISD::CopyToReg && - Register::isVirtualRegister( - cast<RegisterSDNode>(N->getOperand(1))->getReg())) + cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual()) continue; SDNode *PredFrameSetup = nullptr; @@ -3015,8 +3022,7 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // like other nodes from the perspective of scheduling heuristics. if (SDNode *N = SU.getNode()) if (N->getOpcode() == ISD::CopyFromReg && - Register::isVirtualRegister( - cast<RegisterSDNode>(N->getOperand(1))->getReg())) + cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual()) continue; // Perform checks on the successors of PredSU. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 5166db033c62..2e1fd1e8a758 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -110,11 +110,15 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, const TargetRegisterInfo *TRI, const TargetInstrInfo *TII, + const TargetLowering &TLI, unsigned &PhysReg, int &Cost) { if (Op != 2 || User->getOpcode() != ISD::CopyToReg) return; unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TLI.checkForPhysRegDependency(Def, User, Op, TRI, TII, PhysReg, Cost)) + return; + if (Register::isVirtualRegister(Reg)) return; @@ -188,7 +192,7 @@ static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) { "expected an unused glue value"); CloneNodeWithValues(N, DAG, - makeArrayRef(N->value_begin(), N->getNumValues() - 1)); + ArrayRef(N->value_begin(), N->getNumValues() - 1)); } /// ClusterNeighboringLoads - Force nearby loads together by "gluing" them. @@ -460,7 +464,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { // Find all predecessors and successors of the group. for (SDNode *N = SU.getNode(); N; N = N->getGluedNode()) { if (N->isMachineOpcode() && - TII->get(N->getMachineOpcode()).getImplicitDefs()) { + !TII->get(N->getMachineOpcode()).implicit_defs().empty()) { SU.hasPhysRegClobbers = true; unsigned NumUsed = InstrEmitter::CountResults(N); while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1)) @@ -485,7 +489,8 @@ void ScheduleDAGSDNodes::AddSchedEdges() { unsigned PhysReg = 0; int Cost = 1; // Determine if this is a physical register dependency. - CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost); + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + CheckForPhysRegDependency(OpN, N, i, TRI, TII, TLI, PhysReg, Cost); assert((PhysReg == 0 || !isChain) && "Chain dependence via physreg data?"); // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler @@ -843,8 +848,7 @@ EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap, /// not necessarily refer to returned BB. The emitter may split blocks. MachineBasicBlock *ScheduleDAGSDNodes:: EmitSchedule(MachineBasicBlock::iterator &InsertPos) { - InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos, - DAG->getUseInstrRefDebugInfo()); + InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); DenseMap<SDValue, Register> VRBaseMap; DenseMap<SUnit*, Register> CopyVRBaseMap; SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders; @@ -890,6 +894,9 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { MI->setFlag(MachineInstr::MIFlag::NoMerge); } + if (MDNode *MD = DAG->getPCSections(Node)) + MI->setPCSections(MF, MD); + return MI; }; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 195c0e6a836f..9a3609bc183b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -18,7 +18,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -27,6 +26,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -45,6 +45,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Constant.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -92,6 +93,7 @@ void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} void SelectionDAG::DAGUpdateListener::NodeInserted(SDNode *) {} void SelectionDAG::DAGNodeDeletedListener::anchor() {} +void SelectionDAG::DAGNodeInsertedListener::anchor() {} #define DEBUG_TYPE "selectiondag" @@ -291,6 +293,43 @@ bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) { return true; } +bool ISD::isVectorShrinkable(const SDNode *N, unsigned NewEltSize, + bool Signed) { + assert(N->getValueType(0).isVector() && "Expected a vector!"); + + unsigned EltSize = N->getValueType(0).getScalarSizeInBits(); + if (EltSize <= NewEltSize) + return false; + + if (N->getOpcode() == ISD::ZERO_EXTEND) { + return (N->getOperand(0).getValueType().getScalarSizeInBits() <= + NewEltSize) && + !Signed; + } + if (N->getOpcode() == ISD::SIGN_EXTEND) { + return (N->getOperand(0).getValueType().getScalarSizeInBits() <= + NewEltSize) && + Signed; + } + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + + for (const SDValue &Op : N->op_values()) { + if (Op.isUndef()) + continue; + if (!isa<ConstantSDNode>(Op)) + return false; + + APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().trunc(EltSize); + if (Signed && C.trunc(NewEltSize).sext(EltSize) != C) + return false; + if (!Signed && C.trunc(NewEltSize).zext(EltSize) != C) + return false; + } + + return true; +} + bool ISD::allOperandsUndef(const SDNode *N) { // Return false if the node has no operands. // This is "logically inconsistent" with the definition of "all" but @@ -300,6 +339,10 @@ bool ISD::allOperandsUndef(const SDNode *N) { return all_of(N->op_values(), [](SDValue Op) { return Op.isUndef(); }); } +bool ISD::isFreezeUndef(const SDNode *N) { + return N->getOpcode() == ISD::FREEZE && N->getOperand(0).isUndef(); +} + bool ISD::matchUnaryPredicate(SDValue Op, std::function<bool(ConstantSDNode *)> Match, bool AllowUndefs) { @@ -450,10 +493,10 @@ bool ISD::isVPReduction(unsigned Opcode) { } /// The operand position of the vector mask. -Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) { +std::optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) { switch (Opcode) { default: - return None; + return std::nullopt; #define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, ...) \ case ISD::VPSD: \ return MASKPOS; @@ -462,10 +505,10 @@ Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) { } /// The operand position of the explicit vector length parameter. -Optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) { +std::optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) { switch (Opcode) { default: - return None; + return std::nullopt; #define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \ case ISD::VPSD: \ return EVLPOS; @@ -618,7 +661,7 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID, } } -static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC, +static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned OpC, SDVTList VTList, ArrayRef<SDValue> OpList) { AddNodeIDOpcode(ID, OpC); AddNodeIDValueTypes(ID, VTList); @@ -1018,6 +1061,9 @@ void SelectionDAG::DeallocateNode(SDNode *N) { // If any of the SDDbgValue nodes refer to this SDNode, invalidate // them and forget about that node. DbgInfo->erase(N); + + // Invalidate extra info. + SDEI.erase(N); } #ifndef NDEBUG @@ -1230,18 +1276,18 @@ Align SelectionDAG::getEVTAlign(EVT VT) const { // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) : TM(tm), OptLevel(OL), - EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), + EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other, MVT::Glue)), Root(getEntryNode()) { InsertNode(&EntryNode); DbgInfo = new SDDbgInfo(); } void SelectionDAG::init(MachineFunction &NewMF, - OptimizationRemarkEmitter &NewORE, - Pass *PassPtr, const TargetLibraryInfo *LibraryInfo, - LegacyDivergenceAnalysis * Divergence, - ProfileSummaryInfo *PSIin, - BlockFrequencyInfo *BFIin) { + OptimizationRemarkEmitter &NewORE, Pass *PassPtr, + const TargetLibraryInfo *LibraryInfo, + LegacyDivergenceAnalysis *Divergence, + ProfileSummaryInfo *PSIin, BlockFrequencyInfo *BFIin, + FunctionVarLocs const *VarLocs) { MF = &NewMF; SDAGISelPass = PassPtr; ORE = &NewORE; @@ -1252,6 +1298,7 @@ void SelectionDAG::init(MachineFunction &NewMF, DA = Divergence; PSI = PSIin; BFI = BFIin; + FnVarLocs = VarLocs; } SelectionDAG::~SelectionDAG() { @@ -1326,7 +1373,7 @@ void SelectionDAG::clear() { ExternalSymbols.clear(); TargetExternalSymbols.clear(); MCSymbols.clear(); - SDCallSiteDbgInfo.clear(); + SDEI.clear(); std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), static_cast<CondCodeSDNode*>(nullptr)); std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), @@ -1341,7 +1388,8 @@ void SelectionDAG::clear() { SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::FP_EXTEND, DL, VT, Op) - : getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL)); + : getNode(ISD::FP_ROUND, DL, VT, Op, + getIntPtrConstant(0, DL, /*isTarget=*/true)); } std::pair<SDValue, SDValue> @@ -1415,6 +1463,10 @@ SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { return getZeroExtendInReg(Op, DL, VT); } +SDValue SelectionDAG::getNegative(SDValue Val, const SDLoc &DL, EVT VT) { + return getNode(ISD::SUB, DL, VT, getConstant(0, DL, VT), Val); +} + /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) { return getNode(ISD::XOR, DL, VT, Val, getAllOnesConstant(DL, VT)); @@ -1431,6 +1483,20 @@ SDValue SelectionDAG::getVPLogicalNOT(const SDLoc &DL, SDValue Val, return getNode(ISD::VP_XOR, DL, VT, Val, TrueValue, Mask, EVL); } +SDValue SelectionDAG::getVPPtrExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, + SDValue Mask, SDValue EVL) { + return getVPZExtOrTrunc(DL, VT, Op, Mask, EVL); +} + +SDValue SelectionDAG::getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, + SDValue Mask, SDValue EVL) { + if (VT.bitsGT(Op.getValueType())) + return getNode(ISD::VP_ZERO_EXTEND, DL, VT, Op, Mask, EVL); + if (VT.bitsLT(Op.getValueType())) + return getNode(ISD::VP_TRUNCATE, DL, VT, Op, Mask, EVL); + return Op; +} + SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT) { if (!V) @@ -1544,7 +1610,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, "APInt size does not match type size!"); unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(EltVT), None); + AddNodeIDNode(ID, Opc, getVTList(EltVT), std::nullopt); ID.AddPointer(Elt); ID.AddBoolean(isO); void *IP = nullptr; @@ -1561,11 +1627,8 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, } SDValue Result(N, 0); - if (VT.isScalableVector()) - Result = getSplatVector(VT, DL, Result); - else if (VT.isVector()) - Result = getSplatBuildVector(VT, DL, Result); - + if (VT.isVector()) + Result = getSplat(VT, DL, Result); return Result; } @@ -1602,7 +1665,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL, // we don't have issues with SNANs. unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(EltVT), None); + AddNodeIDNode(ID, Opc, getVTList(EltVT), std::nullopt); ID.AddPointer(&V); void *IP = nullptr; SDNode *N = nullptr; @@ -1617,10 +1680,8 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL, } SDValue Result(N, 0); - if (VT.isScalableVector()) - Result = getSplatVector(VT, DL, Result); - else if (VT.isVector()) - Result = getSplatBuildVector(VT, DL, Result); + if (VT.isVector()) + Result = getSplat(VT, DL, Result); NewSDValueDbgMsg(Result, "Creating fp constant: ", this); return Result; } @@ -1661,7 +1722,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), None); + AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); ID.AddPointer(GV); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); @@ -1679,7 +1740,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), None); + AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); ID.AddInteger(FI); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, IP)) @@ -1697,7 +1758,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, "Cannot set target flags on target-independent jump tables"); unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), None); + AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); ID.AddInteger(JTI); ID.AddInteger(TargetFlags); void *IP = nullptr; @@ -1721,7 +1782,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, : getDataLayout().getPrefTypeAlign(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), None); + AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); ID.AddInteger(Alignment->value()); ID.AddInteger(Offset); ID.AddPointer(C); @@ -1748,7 +1809,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, Alignment = getDataLayout().getPrefTypeAlign(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), None); + AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); ID.AddInteger(Alignment->value()); ID.AddInteger(Offset); C->addSelectionDAGCSEId(ID); @@ -1767,7 +1828,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, unsigned TargetFlags) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None); + AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), std::nullopt); ID.AddInteger(Index); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); @@ -1783,7 +1844,7 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), None); + AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), std::nullopt); ID.AddPointer(MBB); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, IP)) @@ -1894,7 +1955,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, "Index out of range"); // Copy the mask so we can do any needed cleanup. - SmallVector<int, 8> MaskVec(Mask.begin(), Mask.end()); + SmallVector<int, 8> MaskVec(Mask); // Canonicalize shuffle v, v -> v, undef if (N1 == N2) { @@ -2050,7 +2111,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { EVT VT = SV.getValueType(0); - SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end()); + SmallVector<int, 8> MaskVec(SV.getMask()); ShuffleVectorSDNode::commuteMask(MaskVec); SDValue Op0 = SV.getOperand(0); @@ -2060,7 +2121,7 @@ SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::Register, getVTList(VT), None); + AddNodeIDNode(ID, ISD::Register, getVTList(VT), std::nullopt); ID.AddInteger(RegNo); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, IP)) @@ -2075,7 +2136,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), None); + AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), std::nullopt); ID.AddPointer(RegMask); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, IP)) @@ -2117,7 +2178,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), None); + AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); ID.AddPointer(BA); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); @@ -2133,7 +2194,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, SDValue SelectionDAG::getSrcValue(const Value *V) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None); + AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), std::nullopt); ID.AddPointer(V); void *IP = nullptr; @@ -2148,7 +2209,7 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { SDValue SelectionDAG::getMDNode(const MDNode *MD) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), None); + AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), std::nullopt); ID.AddPointer(MD); void *IP = nullptr; @@ -2287,7 +2348,7 @@ SDValue SelectionDAG::CreateStackTemporary(TypeSize Bytes, Align Alignment) { StackID = TFI->getStackIDForScalableVectors(); // The stack id gives an indication of whether the object is scalable or // not, so it's safe to pass in the minimum size here. - int FrameIdx = MFI.CreateStackObject(Bytes.getKnownMinSize(), Alignment, + int FrameIdx = MFI.CreateStackObject(Bytes.getKnownMinValue(), Alignment, false, nullptr, StackID); return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); } @@ -2305,8 +2366,9 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { assert(VT1Size.isScalable() == VT2Size.isScalable() && "Don't know how to choose the maximum size when creating a stack " "temporary"); - TypeSize Bytes = - VT1Size.getKnownMinSize() > VT2Size.getKnownMinSize() ? VT1Size : VT2Size; + TypeSize Bytes = VT1Size.getKnownMinValue() > VT2Size.getKnownMinValue() + ? VT1Size + : VT2Size; Type *Ty1 = VT1.getTypeForEVT(*getContext()); Type *Ty2 = VT2.getTypeForEVT(*getContext()); @@ -2380,34 +2442,34 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, default: break; case ISD::SETEQ: if (R==APFloat::cmpUnordered) return getUNDEF(VT); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT, OpVT); case ISD::SETNE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan || R==APFloat::cmpLessThan, dl, VT, OpVT); case ISD::SETLT: if (R==APFloat::cmpUnordered) return getUNDEF(VT); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT, OpVT); case ISD::SETGT: if (R==APFloat::cmpUnordered) return getUNDEF(VT); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl, VT, OpVT); case ISD::SETLE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan || R==APFloat::cmpEqual, dl, VT, OpVT); case ISD::SETGE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan || R==APFloat::cmpEqual, dl, VT, OpVT); case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT, @@ -2459,48 +2521,6 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, return SDValue(); } -/// See if the specified operand can be simplified with the knowledge that only -/// the bits specified by DemandedBits are used. -/// TODO: really we should be making this into the DAG equivalent of -/// SimplifyMultipleUseDemandedBits and not generate any new nodes. -SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) { - EVT VT = V.getValueType(); - - if (VT.isScalableVector()) - return SDValue(); - - switch (V.getOpcode()) { - default: - return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, *this); - case ISD::Constant: { - const APInt &CVal = cast<ConstantSDNode>(V)->getAPIntValue(); - APInt NewVal = CVal & DemandedBits; - if (NewVal != CVal) - return getConstant(NewVal, SDLoc(V), V.getValueType()); - break; - } - case ISD::SRL: - // Only look at single-use SRLs. - if (!V.getNode()->hasOneUse()) - break; - if (auto *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { - // See if we can recursively simplify the LHS. - unsigned Amt = RHSC->getZExtValue(); - - // Watch out for shift count overflow though. - if (Amt >= DemandedBits.getBitWidth()) - break; - APInt SrcDemandedBits = DemandedBits << Amt; - if (SDValue SimplifyLHS = TLI->SimplifyMultipleUseDemandedBits( - V.getOperand(0), SrcDemandedBits, *this)) - return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS, - V.getOperand(1)); - } - break; - } - return SDValue(); -} - /// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We /// use this predicate to simplify operations downstream. bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { @@ -2538,17 +2558,40 @@ bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask, return Mask.isSubsetOf(computeKnownBits(V, Depth).One); } +APInt SelectionDAG::computeVectorKnownZeroElements(SDValue Op, + const APInt &DemandedElts, + unsigned Depth) const { + EVT VT = Op.getValueType(); + assert(VT.isVector() && !VT.isScalableVector() && "Only for fixed vectors!"); + + unsigned NumElts = VT.getVectorNumElements(); + assert(DemandedElts.getBitWidth() == NumElts && "Unexpected demanded mask."); + + APInt KnownZeroElements = APInt::getNullValue(NumElts); + for (unsigned EltIdx = 0; EltIdx != NumElts; ++EltIdx) { + if (!DemandedElts[EltIdx]) + continue; // Don't query elements that are not demanded. + APInt Mask = APInt::getOneBitSet(NumElts, EltIdx); + if (MaskedVectorIsZero(Op, Mask, Depth)) + KnownZeroElements.setBit(EltIdx); + } + return KnownZeroElements; +} + /// isSplatValue - Return true if the vector V has the same value -/// across all DemandedElts. For scalable vectors it does not make -/// sense to specify which elements are demanded or undefined, therefore -/// they are simply ignored. +/// across all DemandedElts. For scalable vectors, we don't know the +/// number of lanes at compile time. Instead, we use a 1 bit APInt +/// to represent a conservative value for all lanes; that is, that +/// one bit value is implicitly splatted across all lanes. bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth) const { unsigned Opcode = V.getOpcode(); EVT VT = V.getValueType(); assert(VT.isVector() && "Vector type expected"); + assert((!VT.isScalableVector() || DemandedElts.getBitWidth() == 1) && + "scalable demanded bits are ignored"); - if (!VT.isScalableVector() && !DemandedElts) + if (!DemandedElts) return false; // No demanded elts, better to assume we don't know anything. if (Depth >= MaxRecursionDepth) @@ -2585,7 +2628,8 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, default: if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) - return TLI->isSplatValueForTargetNode(V, DemandedElts, UndefElts, Depth); + return TLI->isSplatValueForTargetNode(V, DemandedElts, UndefElts, *this, + Depth); break; } @@ -2730,11 +2774,11 @@ bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) const { assert(VT.isVector() && "Vector type expected"); APInt UndefElts; - APInt DemandedElts; - - // For now we don't support this with scalable vectors. - if (!VT.isScalableVector()) - DemandedElts = APInt::getAllOnes(VT.getVectorNumElements()); + // Since the number of lanes in a scalable vector is unknown at compile time, + // we track one bit which is implicitly broadcast to all lanes. This means + // that all lanes in a scalable vector are considered demanded. + APInt DemandedElts + = APInt::getAllOnes(VT.isScalableVector() ? 1 : VT.getVectorNumElements()); return isSplatValue(V, DemandedElts, UndefElts) && (AllowUndefs || !UndefElts); } @@ -2747,10 +2791,11 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) { switch (Opcode) { default: { APInt UndefElts; - APInt DemandedElts; - - if (!VT.isScalableVector()) - DemandedElts = APInt::getAllOnes(VT.getVectorNumElements()); + // Since the number of lanes in a scalable vector is unknown at compile time, + // we track one bit which is implicitly broadcast to all lanes. This means + // that all lanes in a scalable vector are considered demanded. + APInt DemandedElts + = APInt::getAllOnes(VT.isScalableVector() ? 1 : VT.getVectorNumElements()); if (isSplatValue(V, DemandedElts, UndefElts)) { if (VT.isScalableVector()) { @@ -2773,9 +2818,7 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) { SplatIdx = 0; return V; case ISD::VECTOR_SHUFFLE: { - if (VT.isScalableVector()) - return SDValue(); - + assert(!VT.isScalableVector()); // Check if this is a shuffle node doing a splat. // TODO - remove this and rely purely on SelectionDAG::isSplatValue, // getTargetVShiftNode currently struggles without the splat source. @@ -2890,14 +2933,10 @@ const APInt *SelectionDAG::getValidMaximumShiftAmountConstant( KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const { EVT VT = Op.getValueType(); - // TOOD: Until we have a plan for how to represent demanded elements for - // scalable vectors, we can just bail out for now. - if (Op.getValueType().isScalableVector()) { - unsigned BitWidth = Op.getScalarValueSizeInBits(); - return KnownBits(BitWidth); - } - - APInt DemandedElts = VT.isVector() + // Since the number of lanes in a scalable vector is unknown at compile time, + // we track one bit which is implicitly broadcast to all lanes. This means + // that all lanes in a scalable vector are considered demanded. + APInt DemandedElts = VT.isFixedLengthVector() ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return computeKnownBits(Op, DemandedElts, Depth); @@ -2912,11 +2951,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, KnownBits Known(BitWidth); // Don't know anything. - // TOOD: Until we have a plan for how to represent demanded elements for - // scalable vectors, we can just bail out for now. - if (Op.getValueType().isScalableVector()) - return Known; - if (auto *C = dyn_cast<ConstantSDNode>(Op)) { // We know all of the bits for a constant! return KnownBits::makeConstant(C->getAPIntValue()); @@ -2931,7 +2965,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, KnownBits Known2; unsigned NumElts = DemandedElts.getBitWidth(); - assert((!Op.getValueType().isVector() || + assert((!Op.getValueType().isFixedLengthVector() || NumElts == Op.getValueType().getVectorNumElements()) && "Unexpected vector size"); @@ -2943,7 +2977,17 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, case ISD::MERGE_VALUES: return computeKnownBits(Op.getOperand(Op.getResNo()), DemandedElts, Depth + 1); + case ISD::SPLAT_VECTOR: { + SDValue SrcOp = Op.getOperand(0); + assert(SrcOp.getValueSizeInBits() >= BitWidth && + "Expected SPLAT_VECTOR implicit truncation"); + // Implicitly truncate the bits to match the official semantics of + // SPLAT_VECTOR. + Known = computeKnownBits(SrcOp, Depth + 1).trunc(BitWidth); + break; + } case ISD::BUILD_VECTOR: + assert(!Op.getValueType().isScalableVector()); // Collect the known bits that are shared by every demanded vector element. Known.Zero.setAllBits(); Known.One.setAllBits(); for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { @@ -2969,32 +3013,18 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } break; case ISD::VECTOR_SHUFFLE: { + assert(!Op.getValueType().isScalableVector()); // Collect the known bits that are shared by every vector element referenced // by the shuffle. - APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); - Known.Zero.setAllBits(); Known.One.setAllBits(); + APInt DemandedLHS, DemandedRHS; const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); - for (unsigned i = 0; i != NumElts; ++i) { - if (!DemandedElts[i]) - continue; - - int M = SVN->getMaskElt(i); - if (M < 0) { - // For UNDEF elements, we don't know anything about the common state of - // the shuffle result. - Known.resetAll(); - DemandedLHS.clearAllBits(); - DemandedRHS.clearAllBits(); - break; - } + if (!getShuffleDemandedElts(NumElts, SVN->getMask(), DemandedElts, + DemandedLHS, DemandedRHS)) + break; - if ((unsigned)M < NumElts) - DemandedLHS.setBit((unsigned)M % NumElts); - else - DemandedRHS.setBit((unsigned)M % NumElts); - } // Known bits are the values that are shared by every demanded element. + Known.Zero.setAllBits(); Known.One.setAllBits(); if (!!DemandedLHS) { SDValue LHS = Op.getOperand(0); Known2 = computeKnownBits(LHS, DemandedLHS, Depth + 1); @@ -3011,6 +3041,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::CONCAT_VECTORS: { + if (Op.getValueType().isScalableVector()) + break; // Split DemandedElts and test each of the demanded subvectors. Known.Zero.setAllBits(); Known.One.setAllBits(); EVT SubVectorVT = Op.getOperand(0).getValueType(); @@ -3031,6 +3063,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::INSERT_SUBVECTOR: { + if (Op.getValueType().isScalableVector()) + break; // Demand any elements from the subvector and the remainder from the src its // inserted into. SDValue Src = Op.getOperand(0); @@ -3058,7 +3092,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, // Offset the demanded elts by the subvector index. SDValue Src = Op.getOperand(0); // Bail until we can represent demanded elements for scalable vectors. - if (Src.getValueType().isScalableVector()) + if (Op.getValueType().isScalableVector() || Src.getValueType().isScalableVector()) break; uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); @@ -3067,6 +3101,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::SCALAR_TO_VECTOR: { + if (Op.getValueType().isScalableVector()) + break; // We know about scalar_to_vector as much as we know about it source, // which becomes the first element of otherwise unknown vector. if (DemandedElts != 1) @@ -3080,6 +3116,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::BITCAST: { + if (Op.getValueType().isScalableVector()) + break; + SDValue N0 = Op.getOperand(0); EVT SubVT = N0.getValueType(); unsigned SubBitWidth = SubVT.getScalarSizeInBits(); @@ -3335,13 +3374,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result"); // Collect lo/hi source values and concatenate. - // TODO: Would a KnownBits::concatBits helper be useful? unsigned LoBits = Op.getOperand(0).getScalarValueSizeInBits(); unsigned HiBits = Op.getOperand(1).getScalarValueSizeInBits(); Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - Known = Known.anyext(LoBits + HiBits); - Known.insertBits(Known2, LoBits); + Known = Known2.concat(Known); // Collect shift amount. Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); @@ -3372,7 +3409,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // If we have a known 1, its position is our upper bound. unsigned PossibleTZ = Known2.countMaxTrailingZeros(); - unsigned LowBits = Log2_32(PossibleTZ) + 1; + unsigned LowBits = llvm::bit_width(PossibleTZ); Known.Zero.setBitsFrom(LowBits); break; } @@ -3381,7 +3418,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // If we have a known 1, its position is our upper bound. unsigned PossibleLZ = Known2.countMaxLeadingZeros(); - unsigned LowBits = Log2_32(PossibleLZ) + 1; + unsigned LowBits = llvm::bit_width(PossibleLZ); Known.Zero.setBitsFrom(LowBits); break; } @@ -3389,7 +3426,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // If we know some of the bits are zero, they can't be one. unsigned PossibleOnes = Known2.countMaxPopulation(); - Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1); + Known.Zero.setBitsFrom(llvm::bit_width(PossibleOnes)); break; } case ISD::PARITY: { @@ -3403,7 +3440,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, if (ISD::isNON_EXTLoad(LD) && Cst) { // Determine any common known bits from the loaded constant pool value. Type *CstTy = Cst->getType(); - if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits()) { + if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits() && + !Op.getValueType().isScalableVector()) { // If its a vector splat, then we can (quickly) reuse the scalar path. // NOTE: We assume all elements match and none are UNDEF. if (CstTy->isVectorTy()) { @@ -3453,12 +3491,32 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, unsigned MemBits = VT.getScalarSizeInBits(); Known.Zero.setBitsFrom(MemBits); } else if (const MDNode *Ranges = LD->getRanges()) { - if (LD->getExtensionType() == ISD::NON_EXTLOAD) - computeKnownBitsFromRangeMetadata(*Ranges, Known); + EVT VT = LD->getValueType(0); + + // TODO: Handle for extending loads + if (LD->getExtensionType() == ISD::NON_EXTLOAD) { + if (VT.isVector()) { + // Handle truncation to the first demanded element. + // TODO: Figure out which demanded elements are covered + if (DemandedElts != 1 || !getDataLayout().isLittleEndian()) + break; + + // Handle the case where a load has a vector type, but scalar memory + // with an attached range. + EVT MemVT = LD->getMemoryVT(); + KnownBits KnownFull(MemVT.getSizeInBits()); + + computeKnownBitsFromRangeMetadata(*Ranges, KnownFull); + Known = KnownFull.trunc(BitWidth); + } else + computeKnownBitsFromRangeMetadata(*Ranges, Known); + } } break; } case ISD::ZERO_EXTEND_VECTOR_INREG: { + if (Op.getValueType().isScalableVector()) + break; EVT InVT = Op.getOperand(0).getValueType(); APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); @@ -3471,6 +3529,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::SIGN_EXTEND_VECTOR_INREG: { + if (Op.getValueType().isScalableVector()) + break; EVT InVT = Op.getOperand(0).getValueType(); APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); @@ -3487,6 +3547,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::ANY_EXTEND_VECTOR_INREG: { + if (Op.getValueType().isScalableVector()) + break; EVT InVT = Op.getOperand(0).getValueType(); APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); @@ -3506,7 +3568,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, case ISD::AssertZext: { EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); - Known = computeKnownBits(Op.getOperand(0), Depth+1); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known.Zero |= (~InMask); Known.One &= (~Known.Zero); break; @@ -3538,7 +3600,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero.setBitsFrom(1); break; } - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SUB: case ISD::SUBC: { assert(Op.getResNo() == 0 && @@ -3566,7 +3628,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero.setBitsFrom(1); break; } - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::ADD: case ISD::ADDC: case ISD::ADDE: { @@ -3652,6 +3714,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::INSERT_VECTOR_ELT: { + if (Op.getValueType().isScalableVector()) + break; + // If we know the element index, split the demand between the // source vector and the inserted element, otherwise assume we need // the original demanded vector elements and the value. @@ -3781,7 +3846,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero.setBitsFrom(1); break; } - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::ATOMIC_CMP_SWAP: case ISD::ATOMIC_SWAP: case ISD::ATOMIC_LOAD_ADD: @@ -3814,10 +3879,15 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, default: if (Opcode < ISD::BUILTIN_OP_END) break; - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: + // TODO: Probably okay to remove after audit; here to reduce change size + // in initial enablement patch for scalable vectors + if (Op.getValueType().isScalableVector()) + break; + // Allow the target to implement this method for its nodes. TLI->computeKnownBitsForTargetNode(Op, Known, DemandedElts, *this, Depth); break; @@ -3914,11 +3984,10 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { EVT VT = Op.getValueType(); - // TODO: Assume we don't know anything for now. - if (VT.isScalableVector()) - return 1; - - APInt DemandedElts = VT.isVector() + // Since the number of lanes in a scalable vector is unknown at compile time, + // we track one bit which is implicitly broadcast to all lanes. This means + // that all lanes in a scalable vector are considered demanded. + APInt DemandedElts = VT.isFixedLengthVector() ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return ComputeNumSignBits(Op, DemandedElts, Depth); @@ -3941,7 +4010,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, if (Depth >= MaxRecursionDepth) return 1; // Limit search depth. - if (!DemandedElts || VT.isScalableVector()) + if (!DemandedElts) return 1; // No demanded elts, better to assume we don't know anything. unsigned Opcode = Op.getOpcode(); @@ -3956,7 +4025,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::MERGE_VALUES: return ComputeNumSignBits(Op.getOperand(Op.getResNo()), DemandedElts, Depth + 1); + case ISD::SPLAT_VECTOR: { + // Check if the sign bits of source go down as far as the truncated value. + unsigned NumSrcBits = Op.getOperand(0).getValueSizeInBits(); + unsigned NumSrcSignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + if (NumSrcSignBits > (NumSrcBits - VTBits)) + return NumSrcSignBits - (NumSrcBits - VTBits); + break; + } case ISD::BUILD_VECTOR: + assert(!VT.isScalableVector()); Tmp = VTBits; for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) { if (!DemandedElts[i]) @@ -3979,22 +4057,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::VECTOR_SHUFFLE: { // Collect the minimum number of sign bits that are shared by every vector // element referenced by the shuffle. - APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); + APInt DemandedLHS, DemandedRHS; const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); - for (unsigned i = 0; i != NumElts; ++i) { - int M = SVN->getMaskElt(i); - if (!DemandedElts[i]) - continue; - // For UNDEF elements, we don't know anything about the common state of - // the shuffle result. - if (M < 0) - return 1; - if ((unsigned)M < NumElts) - DemandedLHS.setBit((unsigned)M % NumElts); - else - DemandedRHS.setBit((unsigned)M % NumElts); - } + if (!getShuffleDemandedElts(NumElts, SVN->getMask(), DemandedElts, + DemandedLHS, DemandedRHS)) + return 1; + Tmp = std::numeric_limits<unsigned>::max(); if (!!DemandedLHS) Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1); @@ -4010,6 +4079,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, } case ISD::BITCAST: { + if (VT.isScalableVector()) + break; SDValue N0 = Op.getOperand(0); EVT SrcVT = N0.getValueType(); unsigned SrcBits = SrcVT.getScalarSizeInBits(); @@ -4067,6 +4138,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, Tmp2 = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); return std::max(Tmp, Tmp2); case ISD::SIGN_EXTEND_VECTOR_INREG: { + if (VT.isScalableVector()) + break; SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); APInt DemandedSrcElts = DemandedElts.zext(SrcVT.getVectorNumElements()); @@ -4284,6 +4357,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::EXTRACT_ELEMENT: { + if (VT.isScalableVector()) + break; const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1); const int BitWidth = Op.getValueSizeInBits(); const int Items = Op.getOperand(0).getValueSizeInBits() / BitWidth; @@ -4294,9 +4369,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // If the sign portion ends in our element the subtraction gives correct // result. Otherwise it gives either negative or > bitwidth result - return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0); + return std::clamp(KnownSign - rIndex * BitWidth, 0, BitWidth); } case ISD::INSERT_VECTOR_ELT: { + if (VT.isScalableVector()) + break; // If we know the element index, split the demand between the // source vector and the inserted element, otherwise assume we need // the original demanded vector elements and the value. @@ -4327,6 +4404,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return Tmp; } case ISD::EXTRACT_VECTOR_ELT: { + assert(!VT.isScalableVector()); SDValue InVec = Op.getOperand(0); SDValue EltNo = Op.getOperand(1); EVT VecVT = InVec.getValueType(); @@ -4365,6 +4443,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1); } case ISD::CONCAT_VECTORS: { + if (VT.isScalableVector()) + break; // Determine the minimum number of sign bits across all demanded // elts of the input vectors. Early out if the result is already 1. Tmp = std::numeric_limits<unsigned>::max(); @@ -4383,6 +4463,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return Tmp; } case ISD::INSERT_SUBVECTOR: { + if (VT.isScalableVector()) + break; // Demand any elements from the subvector and the remainder from the src its // inserted into. SDValue Src = Op.getOperand(0); @@ -4406,6 +4488,34 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); return Tmp; } + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Op); + if (const MDNode *Ranges = LD->getRanges()) { + if (DemandedElts != 1) + break; + + ConstantRange CR = getConstantRangeFromMetadata(*Ranges); + if (VTBits > CR.getBitWidth()) { + switch (LD->getExtensionType()) { + case ISD::SEXTLOAD: + CR = CR.signExtend(VTBits); + break; + case ISD::ZEXTLOAD: + CR = CR.zeroExtend(VTBits); + break; + default: + break; + } + } + + if (VTBits != CR.getBitWidth()) + break; + return std::min(CR.getSignedMin().getNumSignBits(), + CR.getSignedMax().getNumSignBits()); + } + + break; + } case ISD::ATOMIC_CMP_SWAP: case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: case ISD::ATOMIC_SWAP: @@ -4453,7 +4563,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // We only need to handle vectors - computeKnownBits should handle // scalar cases. Type *CstTy = Cst->getType(); - if (CstTy->isVectorTy() && + if (CstTy->isVectorTy() && !VT.isScalableVector() && (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits() && VTBits == CstTy->getScalarSizeInBits()) { Tmp = VTBits; @@ -4488,10 +4598,14 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, Opcode == ISD::INTRINSIC_WO_CHAIN || Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) { - unsigned NumBits = + // TODO: This can probably be removed once target code is audited. This + // is here purely to reduce patch size and review complexity. + if (!VT.isScalableVector()) { + unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth); - if (NumBits > 1) - FirstAnswer = std::max(FirstAnswer, NumBits); + if (NumBits > 1) + FirstAnswer = std::max(FirstAnswer, NumBits); + } } // Finally, if we can prove that the top bits of the result are 0's or 1's, @@ -4547,6 +4661,11 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, return true; switch (Opcode) { + case ISD::VALUETYPE: + case ISD::FrameIndex: + case ISD::TargetFrameIndex: + return true; + case ISD::UNDEF: return PoisonOnly; @@ -4562,9 +4681,9 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, } return true; - // TODO: Search for noundef attributes from library functions. + // TODO: Search for noundef attributes from library functions. - // TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef. + // TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef. default: // Allow the target to implement this method for its nodes. @@ -4575,7 +4694,94 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, break; } - return false; + // If Op can't create undef/poison and none of its operands are undef/poison + // then Op is never undef/poison. + // NOTE: TargetNodes should handle this in themselves in + // isGuaranteedNotToBeUndefOrPoisonForTargetNode. + return !canCreateUndefOrPoison(Op, PoisonOnly, /*ConsiderFlags*/ true, + Depth) && + all_of(Op->ops(), [&](SDValue V) { + return isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly, Depth + 1); + }); +} + +bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, bool PoisonOnly, + bool ConsiderFlags, + unsigned Depth) const { + // TODO: Assume we don't know anything for now. + EVT VT = Op.getValueType(); + if (VT.isScalableVector()) + return true; + + APInt DemandedElts = VT.isVector() + ? APInt::getAllOnes(VT.getVectorNumElements()) + : APInt(1, 1); + return canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly, ConsiderFlags, + Depth); +} + +bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, + bool PoisonOnly, bool ConsiderFlags, + unsigned Depth) const { + // TODO: Assume we don't know anything for now. + EVT VT = Op.getValueType(); + if (VT.isScalableVector()) + return true; + + unsigned Opcode = Op.getOpcode(); + switch (Opcode) { + case ISD::AssertSext: + case ISD::AssertZext: + case ISD::FREEZE: + case ISD::INSERT_SUBVECTOR: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::ROTL: + case ISD::ROTR: + case ISD::FSHL: + case ISD::FSHR: + case ISD::BSWAP: + case ISD::CTPOP: + case ISD::BITREVERSE: + case ISD::PARITY: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::TRUNCATE: + case ISD::SIGN_EXTEND_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: + case ISD::BITCAST: + case ISD::BUILD_VECTOR: + return false; + + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + // Matches hasPoisonGeneratingFlags(). + return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() || + Op->getFlags().hasNoUnsignedWrap()); + + case ISD::SHL: + // If the max shift amount isn't in range, then the shift can create poison. + if (!getValidMaximumShiftAmountConstant(Op, DemandedElts)) + return true; + + // Matches hasPoisonGeneratingFlags(). + return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() || + Op->getFlags().hasNoUnsignedWrap()); + + default: + // Allow the target to implement this method for its nodes. + if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || + Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) + return TLI->canCreateUndefOrPoisonForTargetNode( + Op, DemandedElts, *this, PoisonOnly, ConsiderFlags, Depth); + break; + } + + // Be conservative and return true. + return true; } bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { @@ -4598,7 +4804,6 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const if (Depth >= MaxRecursionDepth) return false; // Limit search depth. - // TODO: Handle vectors. // If the value is a constant, we can obviously see if it is a NaN or not. if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) { return !C->getValueAPF().isNaN() || @@ -4613,7 +4818,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const case ISD::FDIV: case ISD::FREM: case ISD::FSIN: - case ISD::FCOS: { + case ISD::FCOS: + case ISD::FMA: + case ISD::FMAD: { if (SNaN) return true; // TODO: Need isKnownNeverInfinity @@ -4650,14 +4857,6 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return true; - case ISD::FMA: - case ISD::FMAD: { - if (SNaN) - return true; - return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) && - isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) && - isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1); - } case ISD::FSQRT: // Need is known positive case ISD::FLOG: case ISD::FLOG2: @@ -4696,6 +4895,12 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const case ISD::EXTRACT_VECTOR_ELT: { return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); } + case ISD::BUILD_VECTOR: { + for (const SDValue &Opnd : Op->ops()) + if (!isKnownNeverNaN(Opnd, SNaN, Depth + 1)) + return false; + return true; + } default: if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || @@ -4938,7 +5143,7 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, /// Gets or creates the specified node. SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, getVTList(VT), None); + AddNodeIDNode(ID, Opcode, getVTList(VT), std::nullopt); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) return SDValue(E, 0); @@ -4980,7 +5185,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::TRUNCATE: if (C->isOpaque()) break; - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::ZERO_EXTEND: return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, C->isTargetOpcode(), C->isOpaque()); @@ -5166,7 +5371,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; case ISD::FREEZE: assert(VT == Operand.getValueType() && "Unexpected VT!"); - if (isGuaranteedNotToBeUndefOrPoison(Operand)) + if (isGuaranteedNotToBeUndefOrPoison(Operand, /*PoisonOnly*/ false, + /*Depth*/ 1)) return Operand; break; case ISD::TokenFactor: @@ -5428,8 +5634,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return V; } -static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1, - const APInt &C2) { +static std::optional<APInt> FoldValue(unsigned Opcode, const APInt &C1, + const APInt &C2) { switch (Opcode) { case ISD::ADD: return C1 + C2; case ISD::SUB: return C1 - C2; @@ -5505,7 +5711,23 @@ static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1, return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1); } } - return llvm::None; + return std::nullopt; +} + +// Handle constant folding with UNDEF. +// TODO: Handle more cases. +static std::optional<APInt> FoldValueWithUndef(unsigned Opcode, const APInt &C1, + bool IsUndef1, const APInt &C2, + bool IsUndef2) { + if (!(IsUndef1 || IsUndef2)) + return FoldValue(Opcode, C1, C2); + + // Fold and(x, undef) -> 0 + // Fold mul(x, undef) -> 0 + if (Opcode == ISD::AND || Opcode == ISD::MUL) + return APInt::getZero(C1.getBitWidth()); + + return std::nullopt; } SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, @@ -5581,7 +5803,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (C1->isOpaque() || C2->isOpaque()) return SDValue(); - Optional<APInt> FoldAttempt = + std::optional<APInt> FoldAttempt = FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue()); if (!FoldAttempt) return SDValue(); @@ -5608,7 +5830,6 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, ElementCount NumElts = VT.getVectorElementCount(); // See if we can fold through bitcasted integer ops. - // TODO: Can we handle undef elements? if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() && Ops[0].getValueType() == VT && Ops[1].getValueType() == VT && Ops[0].getOpcode() == ISD::BITCAST && @@ -5624,11 +5845,11 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, SmallVector<APInt> RawBits1, RawBits2; BitVector UndefElts1, UndefElts2; if (BV1->getConstantRawBits(IsLE, EltBits, RawBits1, UndefElts1) && - BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2) && - UndefElts1.none() && UndefElts2.none()) { + BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2)) { SmallVector<APInt> RawBits; for (unsigned I = 0, E = NumElts.getFixedValue(); I != E; ++I) { - Optional<APInt> Fold = FoldValue(Opcode, RawBits1[I], RawBits2[I]); + std::optional<APInt> Fold = FoldValueWithUndef( + Opcode, RawBits1[I], UndefElts1[I], RawBits2[I], UndefElts2[I]); if (!Fold) break; RawBits.push_back(*Fold); @@ -5823,7 +6044,7 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, /*AllowUndefs*/ true)) if (N1C && N1C->getValueAPF().isNegZero() && N2.isUndef()) return getUNDEF(VT); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::FADD: case ISD::FMUL: @@ -5882,11 +6103,11 @@ void SelectionDAG::canonicalizeCommutativeBinop(unsigned Opcode, SDValue &N1, // Canonicalize: // binop(const, nonconst) -> binop(nonconst, const) - bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1); - bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2); - bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1); - bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2); - if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP)) + SDNode *N1C = isConstantIntBuildVectorOrConstantInt(N1); + SDNode *N2C = isConstantIntBuildVectorOrConstantInt(N2); + SDNode *N1CFP = isConstantFPBuildVectorOrConstantFP(N1); + SDNode *N2CFP = isConstantFPBuildVectorOrConstantFP(N2); + if ((N1C && !N2C) || (N1CFP && !N2CFP)) std::swap(N1, N2); // Canonicalize: @@ -5995,6 +6216,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return getNode(ISD::AND, DL, VT, N1, getNOT(DL, N2, VT)); } break; + case ISD::ABDS: + case ISD::ABDU: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + break; case ISD::SMIN: case ISD::UMAX: assert(VT.isInteger() && "This operator does not apply to FP types!"); @@ -6034,12 +6261,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, const APInt &ShiftImm = N2C->getAPIntValue(); return getVScale(DL, VT, MulImm << ShiftImm); } - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SRA: case ISD::SRL: if (SDValue V = simplifyShift(N1, N2)) return V; - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::ROTL: case ISD::ROTR: assert(VT == N1.getValueType() && @@ -6329,7 +6556,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // Handle undef ^ undef -> 0 special case. This is a common // idiom (misuse). return getConstant(0, DL, VT); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::ADD: case ISD::SUB: case ISD::UDIV: @@ -6484,6 +6711,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "Dest and insert subvector source types must match!"); assert(VT.isVector() && N2VT.isVector() && "Insert subvector VTs must be vectors!"); + assert(VT.getVectorElementType() == N2VT.getVectorElementType() && + "Insert subvector VTs must have the same element type!"); assert((VT.isScalableVector() || N2VT.isFixedLengthVector()) && "Cannot insert a scalable vector into a fixed length vector!"); assert((VT.isScalableVector() != N2VT.isScalableVector() || @@ -6674,10 +6903,10 @@ SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, TypeSize Offset, if (Offset.isScalable()) Index = getVScale(DL, Base.getValueType(), - APInt(Base.getValueSizeInBits().getFixedSize(), - Offset.getKnownMinSize())); + APInt(Base.getValueSizeInBits().getFixedValue(), + Offset.getKnownMinValue())); else - Index = getConstant(Offset.getFixedSize(), DL, VT); + Index = getConstant(Offset.getFixedValue(), DL, VT); return getMemBasePlusOffset(Base, Index, DL, Flags); } @@ -6794,7 +7023,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, Align NewAlign = DL.getABITypeAlign(Ty); // Don't promote to an alignment that would require dynamic stack - // realignment. + // realignment which may conflict with optimizations such as tail call + // optimization. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->hasStackRealignment(MF)) while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) @@ -6986,6 +7216,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(C); Align NewAlign = DL.getABITypeAlign(Ty); + + // Don't promote to an alignment that would require dynamic stack + // realignment which may conflict with optimizations such as tail call + // optimization. + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!TRI->hasStackRealignment(MF)) + while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) + NewAlign = NewAlign.previous(); + if (NewAlign > Alignment) { // Give the stack frame object a larger alignment if needed. if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) @@ -7094,7 +7333,17 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - Align NewAlign = DAG.getDataLayout().getABITypeAlign(Ty); + const DataLayout &DL = DAG.getDataLayout(); + Align NewAlign = DL.getABITypeAlign(Ty); + + // Don't promote to an alignment that would require dynamic stack + // realignment which may conflict with optimizations such as tail call + // optimization. + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!TRI->hasStackRealignment(MF)) + while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) + NewAlign = NewAlign.previous(); + if (NewAlign > Alignment) { // Give the stack frame object a larger alignment if needed. if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) @@ -7562,6 +7811,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, Opcode == ISD::ATOMIC_LOAD_FSUB || Opcode == ISD::ATOMIC_LOAD_FMAX || Opcode == ISD::ATOMIC_LOAD_FMIN || + Opcode == ISD::ATOMIC_LOAD_UINC_WRAP || + Opcode == ISD::ATOMIC_LOAD_UDEC_WRAP || Opcode == ISD::ATOMIC_SWAP || Opcode == ISD::ATOMIC_STORE) && "Invalid Atomic Op"); @@ -8816,12 +9067,12 @@ SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) { if (auto *CondC = dyn_cast<ConstantSDNode>(Cond)) return CondC->isZero() ? F : T; - // TODO: This should simplify VSELECT with constant condition using something - // like this (but check boolean contents to be complete?): - // if (ISD::isBuildVectorAllOnes(Cond.getNode())) - // return T; - // if (ISD::isBuildVectorAllZeros(Cond.getNode())) - // return F; + // TODO: This should simplify VSELECT with non-zero constant condition using + // something like this (but check boolean contents to be complete?): + if (ConstantSDNode *CondC = isConstOrConstSplat(Cond, /*AllowUndefs*/ false, + /*AllowTruncation*/ true)) + if (CondC->isZero()) + return F; // select ?, T, T --> T if (T == F) @@ -9177,7 +9428,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList) { - return getNode(Opcode, DL, VTList, None); + return getNode(Opcode, DL, VTList, std::nullopt); } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, @@ -9444,7 +9695,7 @@ void SelectionDAG::setNodeMemRefs(MachineSDNode *N, SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT) { SDVTList VTs = getVTList(VT); - return SelectNodeTo(N, MachineOpc, VTs, None); + return SelectNodeTo(N, MachineOpc, VTs, std::nullopt); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -9485,7 +9736,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1, EVT VT2) { SDVTList VTs = getVTList(VT1, VT2); - return SelectNodeTo(N, MachineOpc, VTs, None); + return SelectNodeTo(N, MachineOpc, VTs, std::nullopt); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -9652,7 +9903,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT) { SDVTList VTs = getVTList(VT); - return getMachineNode(Opcode, dl, VTs, None); + return getMachineNode(Opcode, dl, VTs, std::nullopt); } MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, @@ -10091,6 +10342,8 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { // Preserve Debug Values transferDbgValues(FromN, To); + // Preserve extra info. + copyExtraInfo(From, To.getNode()); // Iterate over all the existing uses of From. New uses will be added // to the beginning of the use list, which we avoid visiting. @@ -10152,6 +10405,8 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) { assert((i < To->getNumValues()) && "Invalid To location"); transferDbgValues(SDValue(From, i), SDValue(To, i)); } + // Preserve extra info. + copyExtraInfo(From, To); // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. @@ -10194,9 +10449,12 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { if (From->getNumValues() == 1) // Handle the simple case efficiently. return ReplaceAllUsesWith(SDValue(From, 0), To[0]); - // Preserve Debug Info. - for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) + for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) { + // Preserve Debug Info. transferDbgValues(SDValue(From, i), To[i]); + // Preserve extra info. + copyExtraInfo(From, To[i].getNode()); + } // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. @@ -10249,6 +10507,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ // Preserve Debug Info. transferDbgValues(From, To); + copyExtraInfo(From.getNode(), To.getNode()); // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. @@ -10402,6 +10661,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, return ReplaceAllUsesOfValueWith(*From, *To); transferDbgValues(*From, *To); + copyExtraInfo(From->getNode(), To->getNode()); // Read up all the uses and make records of them. This helps // processing new uses that are introduced during the @@ -10636,6 +10896,67 @@ bool llvm::isMinSignedConstant(SDValue V) { return Const != nullptr && Const->isMinSignedValue(); } +bool llvm::isNeutralConstant(unsigned Opcode, SDNodeFlags Flags, SDValue V, + unsigned OperandNo) { + // NOTE: The cases should match with IR's ConstantExpr::getBinOpIdentity(). + // TODO: Target-specific opcodes could be added. + if (auto *Const = isConstOrConstSplat(V)) { + switch (Opcode) { + case ISD::ADD: + case ISD::OR: + case ISD::XOR: + case ISD::UMAX: + return Const->isZero(); + case ISD::MUL: + return Const->isOne(); + case ISD::AND: + case ISD::UMIN: + return Const->isAllOnes(); + case ISD::SMAX: + return Const->isMinSignedValue(); + case ISD::SMIN: + return Const->isMaxSignedValue(); + case ISD::SUB: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + return OperandNo == 1 && Const->isZero(); + case ISD::UDIV: + case ISD::SDIV: + return OperandNo == 1 && Const->isOne(); + } + } else if (auto *ConstFP = isConstOrConstSplatFP(V)) { + switch (Opcode) { + case ISD::FADD: + return ConstFP->isZero() && + (Flags.hasNoSignedZeros() || ConstFP->isNegative()); + case ISD::FSUB: + return OperandNo == 1 && ConstFP->isZero() && + (Flags.hasNoSignedZeros() || !ConstFP->isNegative()); + case ISD::FMUL: + return ConstFP->isExactlyValue(1.0); + case ISD::FDIV: + return OperandNo == 1 && ConstFP->isExactlyValue(1.0); + case ISD::FMINNUM: + case ISD::FMAXNUM: { + // Neutral element for fminnum is NaN, Inf or FLT_MAX, depending on FMF. + EVT VT = V.getValueType(); + const fltSemantics &Semantics = SelectionDAG::EVTToAPFloatSemantics(VT); + APFloat NeutralAF = !Flags.hasNoNaNs() + ? APFloat::getQNaN(Semantics) + : !Flags.hasNoInfs() + ? APFloat::getInf(Semantics) + : APFloat::getLargest(Semantics); + if (Opcode == ISD::FMAXNUM) + NeutralAF.changeSign(); + + return ConstFP->isExactlyValue(NeutralAF); + } + } + } + return false; +} + SDValue llvm::peekThroughBitcasts(SDValue V) { while (V.getOpcode() == ISD::BITCAST) V = V.getOperand(0); @@ -10666,6 +10987,16 @@ bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) { ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs, bool AllowTruncation) { + EVT VT = N.getValueType(); + APInt DemandedElts = VT.isFixedLengthVector() + ? APInt::getAllOnes(VT.getVectorMinNumElements()) + : APInt(1, 1); + return isConstOrConstSplat(N, DemandedElts, AllowUndefs, AllowTruncation); +} + +ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts, + bool AllowUndefs, + bool AllowTruncation) { if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) return CN; @@ -10683,34 +11014,11 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs, if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { BitVector UndefElements; - ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); - - // BuildVectors can truncate their operands. Ignore that case here unless - // AllowTruncation is set. - if (CN && (UndefElements.none() || AllowUndefs)) { - EVT CVT = CN->getValueType(0); - EVT NSVT = N.getValueType().getScalarType(); - assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension"); - if (AllowTruncation || (CVT == NSVT)) - return CN; - } - } - - return nullptr; -} - -ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts, - bool AllowUndefs, - bool AllowTruncation) { - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) - return CN; - - if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { - BitVector UndefElements; ConstantSDNode *CN = BV->getConstantSplatNode(DemandedElts, &UndefElements); // BuildVectors can truncate their operands. Ignore that case here unless // AllowTruncation is set. + // TODO: Look into whether we should allow UndefElements in non-DemandedElts if (CN && (UndefElements.none() || AllowUndefs)) { EVT CVT = CN->getValueType(0); EVT NSVT = N.getValueType().getScalarType(); @@ -10724,21 +11032,11 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts, } ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) { - if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) - return CN; - - if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { - BitVector UndefElements; - ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements); - if (CN && (UndefElements.none() || AllowUndefs)) - return CN; - } - - if (N.getOpcode() == ISD::SPLAT_VECTOR) - if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N.getOperand(0))) - return CN; - - return nullptr; + EVT VT = N.getValueType(); + APInt DemandedElts = VT.isFixedLengthVector() + ? APInt::getAllOnes(VT.getVectorMinNumElements()) + : APInt(1, 1); + return isConstOrConstSplatFP(N, DemandedElts, AllowUndefs); } ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, @@ -10751,10 +11049,15 @@ ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, BitVector UndefElements; ConstantFPSDNode *CN = BV->getConstantFPSplatNode(DemandedElts, &UndefElements); + // TODO: Look into whether we should allow UndefElements in non-DemandedElts if (CN && (UndefElements.none() || AllowUndefs)) return CN; } + if (N.getOpcode() == ISD::SPLAT_VECTOR) + if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N.getOperand(0))) + return CN; + return nullptr; } @@ -10808,7 +11111,7 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, // the MMO. This is because the MMO might indicate only a possible address // range instead of specifying the affected memory addresses precisely. // TODO: Make MachineMemOperands aware of scalable vectors. - assert(memvt.getStoreSize().getKnownMinSize() <= MMO->getSize() && + assert(memvt.getStoreSize().getKnownMinValue() <= MMO->getSize() && "Size mismatch!"); } @@ -11221,7 +11524,7 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, return false; if (LD->getChain() != Base->getChain()) return false; - EVT VT = LD->getValueType(0); + EVT VT = LD->getMemoryVT(); if (VT.getSizeInBits() / 8 != Bytes) return false; @@ -11234,8 +11537,8 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, return false; } -/// InferPtrAlignment - Infer alignment of a load / store address. Return None -/// if it cannot be inferred. +/// InferPtrAlignment - Infer alignment of a load / store address. Return +/// std::nullopt if it cannot be inferred. MaybeAlign SelectionDAG::InferPtrAlign(SDValue Ptr) const { // If this is a GlobalAddress + cst, return the alignment. const GlobalValue *GV = nullptr; @@ -11267,7 +11570,7 @@ MaybeAlign SelectionDAG::InferPtrAlign(SDValue Ptr) const { return commonAlignment(MFI.getObjectAlign(FrameIdx), FrameOffset); } - return None; + return std::nullopt; } /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type @@ -11690,30 +11993,30 @@ bool BuildVectorSDNode::isConstant() const { return true; } -Optional<std::pair<APInt, APInt>> +std::optional<std::pair<APInt, APInt>> BuildVectorSDNode::isConstantSequence() const { unsigned NumOps = getNumOperands(); if (NumOps < 2) - return None; + return std::nullopt; if (!isa<ConstantSDNode>(getOperand(0)) || !isa<ConstantSDNode>(getOperand(1))) - return None; + return std::nullopt; unsigned EltSize = getValueType(0).getScalarSizeInBits(); APInt Start = getConstantOperandAPInt(0).trunc(EltSize); APInt Stride = getConstantOperandAPInt(1).trunc(EltSize) - Start; if (Stride.isZero()) - return None; + return std::nullopt; for (unsigned i = 2; i < NumOps; ++i) { if (!isa<ConstantSDNode>(getOperand(i))) - return None; + return std::nullopt; APInt Val = getConstantOperandAPInt(i).trunc(EltSize); if (Val != (Start + (Stride * i))) - return None; + return std::nullopt; } return std::make_pair(Start, Stride); @@ -11847,6 +12150,18 @@ SDValue SelectionDAG::getNeutralElement(unsigned Opcode, const SDLoc &DL, } } +void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) { + assert(From && To && "Invalid SDNode; empty source SDValue?"); + auto I = SDEI.find(From); + if (I == SDEI.end()) + return; + + // Use of operator[] on the DenseMap may cause an insertion, which invalidates + // the iterator, hence the need to make a copy to prevent a use-after-free. + NodeExtraInfo Copy = I->second; + SDEI[To] = std::move(Copy); +} + #ifndef NDEBUG static void checkForCyclesHelper(const SDNode *N, SmallPtrSetImpl<const SDNode*> &Visited, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index d236433f6fb4..a432d8e92bca 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -85,9 +85,9 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other, } bool BaseIndexOffset::computeAliasing(const SDNode *Op0, - const Optional<int64_t> NumBytes0, + const std::optional<int64_t> NumBytes0, const SDNode *Op1, - const Optional<int64_t> NumBytes1, + const std::optional<int64_t> NumBytes1, const SelectionDAG &DAG, bool &IsAlias) { BaseIndexOffset BasePtr0 = match(Op0, DAG); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ecdaef0442da..0bdfdac6a65f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -15,8 +15,6 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -27,10 +25,12 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/AssignmentTrackingAnalysis.h" #include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -63,6 +63,7 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" @@ -99,6 +100,7 @@ #include <cstddef> #include <iterator> #include <limits> +#include <optional> #include <tuple> using namespace llvm; @@ -148,18 +150,18 @@ static const unsigned MaxParallelChains = 64; static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, - Optional<CallingConv::ID> CC); + std::optional<CallingConv::ID> CC); /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type /// larger than ValueVT then AssertOp can be used to specify whether the extra /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT /// (ISD::AssertSext). -static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, - const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, const Value *V, - Optional<CallingConv::ID> CC = None, - Optional<ISD::NodeType> AssertOp = None) { +static SDValue +getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, + unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, + std::optional<CallingConv::ID> CC = std::nullopt, + std::optional<ISD::NodeType> AssertOp = std::nullopt) { // Let the target assemble the parts if it wants to const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (SDValue Val = TLI.joinRegisterPartsIntoValue(DAG, DL, Parts, NumParts, @@ -180,8 +182,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, unsigned ValueBits = ValueVT.getSizeInBits(); // Assemble the power of 2 part. - unsigned RoundParts = - (NumParts & (NumParts - 1)) ? 1 << Log2_32(NumParts) : NumParts; + unsigned RoundParts = llvm::bit_floor(NumParts); unsigned RoundBits = PartBits * RoundParts; EVT RoundVT = RoundBits == ValueBits ? ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits); @@ -320,7 +321,7 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, - Optional<CallingConv::ID> CallConv) { + std::optional<CallingConv::ID> CallConv) { assert(ValueVT.isVector() && "Not a vector value"); assert(NumParts > 0 && "No parts to assemble!"); const bool IsABIRegCopy = CallConv.has_value(); @@ -397,10 +398,9 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - // If the element type of the source/dest vectors are the same, but the - // parts vector has more elements than the value vector, then we have a - // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the - // elements we want. + // If the parts vector has more elements than the value vector, then we + // have a vector widening case (e.g. <2 x float> -> <4 x float>). + // Extract the elements we want. if (PartEVT.getVectorElementCount() != ValueVT.getVectorElementCount()) { assert((PartEVT.getVectorElementCount().getKnownMinValue() > ValueVT.getVectorElementCount().getKnownMinValue()) && @@ -414,6 +414,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, DAG.getVectorIdxConstant(0, DL)); if (PartEVT == ValueVT) return Val; + if (PartEVT.isInteger() && ValueVT.isFloatingPoint()) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); } // Promoted vector extract @@ -447,12 +449,22 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, // Handle cases such as i8 -> <1 x i1> EVT ValueSVT = ValueVT.getVectorElementType(); if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) { - if (ValueSVT.getSizeInBits() == PartEVT.getSizeInBits()) + unsigned ValueSize = ValueSVT.getSizeInBits(); + if (ValueSize == PartEVT.getSizeInBits()) { Val = DAG.getNode(ISD::BITCAST, DL, ValueSVT, Val); - else + } else if (ValueSVT.isFloatingPoint() && PartEVT.isInteger()) { + // It's possible a scalar floating point type gets softened to integer and + // then promoted to a larger integer. If PartEVT is the larger integer + // we need to truncate it and then bitcast to the FP type. + assert(ValueSVT.bitsLT(PartEVT) && "Unexpected types"); + EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize); + Val = DAG.getNode(ISD::TRUNCATE, DL, IntermediateType, Val); + Val = DAG.getBitcast(ValueSVT, Val); + } else { Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT) : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT); + } } return DAG.getBuildVector(ValueVT, DL, Val); @@ -461,16 +473,16 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, - Optional<CallingConv::ID> CallConv); + std::optional<CallingConv::ID> CallConv); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. -static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, - SDValue *Parts, unsigned NumParts, MVT PartVT, - const Value *V, - Optional<CallingConv::ID> CallConv = None, - ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { +static void +getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, + unsigned NumParts, MVT PartVT, const Value *V, + std::optional<CallingConv::ID> CallConv = std::nullopt, + ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { // Let the target split the parts if it wants to const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.splitValueIntoRegisterParts(DAG, DL, Val, Parts, NumParts, PartVT, @@ -555,7 +567,7 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, // The number of parts is not a power of 2. Split off and copy the tail. assert(PartVT.isInteger() && ValueVT.isInteger() && "Do not know what to expand to!"); - unsigned RoundParts = 1 << Log2_32(NumParts); + unsigned RoundParts = llvm::bit_floor(NumParts); unsigned RoundBits = RoundParts * PartBits; unsigned OddParts = NumParts - RoundParts; SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, @@ -643,7 +655,7 @@ static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val, static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, - Optional<CallingConv::ID> CallConv) { + std::optional<CallingConv::ID> CallConv) { EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -678,7 +690,11 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, SDValue Widened = widenVectorToPartType(DAG, Val, DL, WidenVT); Val = DAG.getAnyExtOrTrunc(Widened, DL, PartVT); } else { - if (ValueVT.getVectorElementCount().isScalar()) { + // Don't extract an integer from a float vector. This can happen if the + // FP type gets softened to integer and then promoted. The promotion + // prevents it from being picked up by the earlier bitcast case. + if (ValueVT.getVectorElementCount().isScalar() && + (!ValueVT.isFloatingPoint() || !PartVT.isInteger())) { Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getVectorIdxConstant(0, DL)); } else { @@ -703,8 +719,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, unsigned NumRegs; if (IsABIRegCopy) { NumRegs = TLI.getVectorTypeBreakdownForCallingConv( - *DAG.getContext(), CallConv.value(), ValueVT, IntermediateVT, - NumIntermediates, RegisterVT); + *DAG.getContext(), *CallConv, ValueVT, IntermediateVT, NumIntermediates, + RegisterVT); } else { NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, @@ -718,7 +734,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, assert(IntermediateVT.isScalableVector() == ValueVT.isScalableVector() && "Mixing scalable and fixed vectors when copying in parts"); - Optional<ElementCount> DestEltCnt; + std::optional<ElementCount> DestEltCnt; if (IntermediateVT.isVector()) DestEltCnt = IntermediateVT.getVectorElementCount() * NumIntermediates; @@ -786,13 +802,13 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, } RegsForValue::RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, - EVT valuevt, Optional<CallingConv::ID> CC) + EVT valuevt, std::optional<CallingConv::ID> CC) : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs), RegCount(1, regs.size()), CallConv(CC) {} RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, const DataLayout &DL, unsigned Reg, Type *Ty, - Optional<CallingConv::ID> CC) { + std::optional<CallingConv::ID> CC) { ComputeValueVTs(TLI, DL, Ty, ValueVTs); CallConv = CC; @@ -800,11 +816,11 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, for (EVT ValueVT : ValueVTs) { unsigned NumRegs = isABIMangled() - ? TLI.getNumRegistersForCallingConv(Context, CC.value(), ValueVT) + ? TLI.getNumRegistersForCallingConv(Context, *CC, ValueVT) : TLI.getNumRegisters(Context, ValueVT); MVT RegisterVT = isABIMangled() - ? TLI.getRegisterTypeForCallingConv(Context, CC.value(), ValueVT) + ? TLI.getRegisterTypeForCallingConv(Context, *CC, ValueVT) : TLI.getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(Reg + i); @@ -831,10 +847,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // Copy the legal parts from the registers. EVT ValueVT = ValueVTs[Value]; unsigned NumRegs = RegCount[Value]; - MVT RegisterVT = - isABIMangled() ? TLI.getRegisterTypeForCallingConv( - *DAG.getContext(), CallConv.value(), RegVTs[Value]) - : RegVTs[Value]; + MVT RegisterVT = isABIMangled() + ? TLI.getRegisterTypeForCallingConv( + *DAG.getContext(), *CallConv, RegVTs[Value]) + : RegVTs[Value]; Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { @@ -914,10 +930,10 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { unsigned NumParts = RegCount[Value]; - MVT RegisterVT = - isABIMangled() ? TLI.getRegisterTypeForCallingConv( - *DAG.getContext(), CallConv.value(), RegVTs[Value]) - : RegVTs[Value]; + MVT RegisterVT = isABIMangled() + ? TLI.getRegisterTypeForCallingConv( + *DAG.getContext(), *CallConv, RegVTs[Value]) + : RegVTs[Value]; if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) ExtendKind = ISD::ZERO_EXTEND; @@ -1025,8 +1041,10 @@ RegsForValue::getRegsAndSizes() const { } void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa, + AssumptionCache *ac, const TargetLibraryInfo *li) { AA = aa; + AC = ac; GFI = gfi; LibInfo = li; Context = DAG.getContext(); @@ -1117,18 +1135,57 @@ void SelectionDAGBuilder::visit(const Instruction &I) { HandlePHINodesInSuccessorBlocks(I.getParent()); } + // Add SDDbgValue nodes for any var locs here. Do so before updating + // SDNodeOrder, as this mapping is {Inst -> Locs BEFORE Inst}. + if (FunctionVarLocs const *FnVarLocs = DAG.getFunctionVarLocs()) { + // Add SDDbgValue nodes for any var locs here. Do so before updating + // SDNodeOrder, as this mapping is {Inst -> Locs BEFORE Inst}. + for (auto It = FnVarLocs->locs_begin(&I), End = FnVarLocs->locs_end(&I); + It != End; ++It) { + auto *Var = FnVarLocs->getDILocalVariable(It->VariableID); + dropDanglingDebugInfo(Var, It->Expr); + if (!handleDebugValue(It->V, Var, It->Expr, It->DL, SDNodeOrder, + /*IsVariadic=*/false)) + addDanglingDebugInfo(It, SDNodeOrder); + } + } + // Increase the SDNodeOrder if dealing with a non-debug instruction. if (!isa<DbgInfoIntrinsic>(I)) ++SDNodeOrder; CurInst = &I; + // Set inserted listener only if required. + bool NodeInserted = false; + std::unique_ptr<SelectionDAG::DAGNodeInsertedListener> InsertedListener; + MDNode *PCSectionsMD = I.getMetadata(LLVMContext::MD_pcsections); + if (PCSectionsMD) { + InsertedListener = std::make_unique<SelectionDAG::DAGNodeInsertedListener>( + DAG, [&](SDNode *) { NodeInserted = true; }); + } + visit(I.getOpcode(), I); if (!I.isTerminator() && !HasTailCall && !isa<GCStatepointInst>(I)) // statepoints handle their exports internally CopyToExportRegsIfNeeded(&I); + // Handle metadata. + if (PCSectionsMD) { + auto It = NodeMap.find(&I); + if (It != NodeMap.end()) { + DAG.addPCSections(It->second.getNode(), PCSectionsMD); + } else if (NodeInserted) { + // This should not happen; if it does, don't let it go unnoticed so we can + // fix it. Relevant visit*() function is probably missing a setValue(). + errs() << "warning: loosing !pcsections metadata [" + << I.getModule()->getName() << "]\n"; + LLVM_DEBUG(I.dump()); + assert(false); + } + } + CurInst = nullptr; } @@ -1148,8 +1205,13 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { } } +void SelectionDAGBuilder::addDanglingDebugInfo(const VarLocInfo *VarLoc, + unsigned Order) { + DanglingDebugInfoMap[VarLoc->V].emplace_back(VarLoc, Order); +} + void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI, - DebugLoc DL, unsigned Order) { + unsigned Order) { // We treat variadic dbg_values differently at this stage. if (DI->hasArgList()) { // For variadic dbg_values we will now insert an undef. @@ -1161,7 +1223,7 @@ void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI, } SDDbgValue *SDV = DAG.getDbgValueList( DI->getVariable(), DI->getExpression(), Locs, {}, - /*IsIndirect=*/false, DL, Order, /*IsVariadic=*/true); + /*IsIndirect=*/false, DI->getDebugLoc(), Order, /*IsVariadic=*/true); DAG.AddDbgValue(SDV, /*isParameter=*/false); } else { // TODO: Dangling debug info will eventually either be resolved or produce @@ -1171,18 +1233,18 @@ void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI, assert(DI->getNumVariableLocationOps() == 1 && "DbgValueInst without an ArgList should have a single location " "operand."); - DanglingDebugInfoMap[DI->getValue(0)].emplace_back(DI, DL, Order); + DanglingDebugInfoMap[DI->getValue(0)].emplace_back(DI, Order); } } void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable, const DIExpression *Expr) { auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) { - const DbgValueInst *DI = DDI.getDI(); - DIVariable *DanglingVariable = DI->getVariable(); - DIExpression *DanglingExpr = DI->getExpression(); + DIVariable *DanglingVariable = DDI.getVariable(DAG.getFunctionVarLocs()); + DIExpression *DanglingExpr = DDI.getExpression(); if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) { - LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << *DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << printDDI(DDI) + << "\n"); return true; } return false; @@ -1211,15 +1273,12 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second; for (auto &DDI : DDIV) { - const DbgValueInst *DI = DDI.getDI(); - assert(!DI->hasArgList() && "Not implemented for variadic dbg_values"); - assert(DI && "Ill-formed DanglingDebugInfo"); - DebugLoc dl = DDI.getdl(); + DebugLoc DL = DDI.getDebugLoc(); unsigned ValSDNodeOrder = Val.getNode()->getIROrder(); unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); - DILocalVariable *Variable = DI->getVariable(); - DIExpression *Expr = DI->getExpression(); - assert(Variable->isValidLocationForIntrinsic(dl) && + DILocalVariable *Variable = DDI.getVariable(DAG.getFunctionVarLocs()); + DIExpression *Expr = DDI.getExpression(); + assert(Variable->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); SDDbgValue *SDV; if (Val.getNode()) { @@ -1229,10 +1288,10 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, // in the first place we should not be more successful here). Unless we // have some test case that prove this to be correct we should avoid // calling EmitFuncArgumentDbgValue here. - if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, + if (!EmitFuncArgumentDbgValue(V, Variable, Expr, DL, FuncArgumentDbgValueKind::Value, Val)) { - LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order=" - << DbgSDNodeOrder << "] for:\n " << *DI << "\n"); + LLVM_DEBUG(dbgs() << "Resolve dangling debug info for " << printDDI(DDI) + << "\n"); LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump()); // Increase the SDNodeOrder for the DbgValue here to make sure it is // inserted after the definition of Val when emitting the instructions @@ -1241,17 +1300,17 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs() << "changing SDNodeOrder from " << DbgSDNodeOrder << " to " << ValSDNodeOrder << "\n"); - SDV = getDbgValue(Val, Variable, Expr, dl, + SDV = getDbgValue(Val, Variable, Expr, DL, std::max(DbgSDNodeOrder, ValSDNodeOrder)); DAG.AddDbgValue(SDV, false); } else - LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI - << "in EmitFuncArgumentDbgValue\n"); + LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " + << printDDI(DDI) << " in EmitFuncArgumentDbgValue\n"); } else { - LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); - auto Undef = UndefValue::get(DDI.getDI()->getValue(0)->getType()); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << printDDI(DDI) << "\n"); + auto Undef = UndefValue::get(V->getType()); auto SDV = - DAG.getConstantDbgValue(Variable, Expr, Undef, dl, DbgSDNodeOrder); + DAG.getConstantDbgValue(Variable, Expr, Undef, DL, DbgSDNodeOrder); DAG.AddDbgValue(SDV, false); } } @@ -1263,21 +1322,19 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { // state of `handleDebugValue`, we need know specifically which values were // invalid, so that we attempt to salvage only those values when processing // a DIArgList. - assert(!DDI.getDI()->hasArgList() && - "Not implemented for variadic dbg_values"); - Value *V = DDI.getDI()->getValue(0); - DILocalVariable *Var = DDI.getDI()->getVariable(); - DIExpression *Expr = DDI.getDI()->getExpression(); - DebugLoc DL = DDI.getdl(); - DebugLoc InstDL = DDI.getDI()->getDebugLoc(); + Value *V = DDI.getVariableLocationOp(0); + Value *OrigV = V; + DILocalVariable *Var = DDI.getVariable(DAG.getFunctionVarLocs()); + DIExpression *Expr = DDI.getExpression(); + DebugLoc DL = DDI.getDebugLoc(); unsigned SDOrder = DDI.getSDNodeOrder(); + // Currently we consider only dbg.value intrinsics -- we tell the salvager // that DW_OP_stack_value is desired. - assert(isa<DbgValueInst>(DDI.getDI())); bool StackValue = true; // Can this Value can be encoded without any further work? - if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder, /*IsVariadic=*/false)) + if (handleDebugValue(V, Var, Expr, DL, SDOrder, /*IsVariadic=*/false)) return; // Attempt to salvage back through as many instructions as possible. Bail if @@ -1306,10 +1363,10 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { // Some kind of simplification occurred: check whether the operand of the // salvaged debug expression can be encoded in this DAG. - if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder, - /*IsVariadic=*/false)) { - LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n " - << *DDI.getDI() << "\nBy stripping back to:\n " << *V); + if (handleDebugValue(V, Var, Expr, DL, SDOrder, /*IsVariadic=*/false)) { + LLVM_DEBUG( + dbgs() << "Salvaged debug location info for:\n " << *Var << "\n" + << *OrigV << "\nBy stripping back to:\n " << *V << "\n"); return; } } @@ -1317,21 +1374,18 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { // This was the final opportunity to salvage this debug information, and it // couldn't be done. Place an undef DBG_VALUE at this location to terminate // any earlier variable location. - auto Undef = UndefValue::get(DDI.getDI()->getValue(0)->getType()); - auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder); + assert(OrigV && "V shouldn't be null"); + auto *Undef = UndefValue::get(OrigV->getType()); + auto *SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder); DAG.AddDbgValue(SDV, false); - - LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << *DDI.getDI() - << "\n"); - LLVM_DEBUG(dbgs() << " Last seen at:\n " << *DDI.getDI()->getOperand(0) + LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << printDDI(DDI) << "\n"); } bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values, DILocalVariable *Var, - DIExpression *Expr, DebugLoc dl, - DebugLoc InstDL, unsigned Order, - bool IsVariadic) { + DIExpression *Expr, DebugLoc DbgLoc, + unsigned Order, bool IsVariadic) { if (Values.empty()) return true; SmallVector<SDDbgOperand> LocationOps; @@ -1344,6 +1398,13 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values, continue; } + // Look through IntToPtr constants. + if (auto *CE = dyn_cast<ConstantExpr>(V)) + if (CE->getOpcode() == Instruction::IntToPtr) { + LocationOps.emplace_back(SDDbgOperand::fromConst(CE->getOperand(0))); + continue; + } + // If the Value is a frame index, we can create a FrameIndex debug value // without relying on the DAG at all. if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { @@ -1362,7 +1423,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values, if (N.getNode()) { // Only emit func arg dbg value for non-variadic dbg.values for now. if (!IsVariadic && - EmitFuncArgumentDbgValue(V, Var, Expr, dl, + EmitFuncArgumentDbgValue(V, Var, Expr, DbgLoc, FuncArgumentDbgValueKind::Value, N)) return true; if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) { @@ -1391,7 +1452,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values, // they're parameters, and they are parameters of the current function. We // need to let them dangle until they get an SDNode. bool IsParamOfFunc = - isa<Argument>(V) && Var->isParameter() && !InstDL.getInlinedAt(); + isa<Argument>(V) && Var->isParameter() && !DbgLoc.getInlinedAt(); if (IsParamOfFunc) return false; @@ -1404,7 +1465,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values, // If this is a PHI node, it may be split up into several MI PHI nodes // (in FunctionLoweringInfo::set). RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, - V->getType(), None); + V->getType(), std::nullopt); if (RFV.occupiesMultipleRegs()) { // FIXME: We could potentially support variadic dbg_values here. if (IsVariadic) @@ -1429,7 +1490,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values, if (!FragmentExpr) continue; SDDbgValue *SDV = DAG.getVRegDbgValue( - Var, *FragmentExpr, RegAndSize.first, false, dl, SDNodeOrder); + Var, *FragmentExpr, RegAndSize.first, false, DbgLoc, SDNodeOrder); DAG.AddDbgValue(SDV, false); Offset += RegisterSize; } @@ -1446,9 +1507,9 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values, // We have created a SDDbgOperand for each Value in Values. // Should use Order instead of SDNodeOrder? assert(!LocationOps.empty()); - SDDbgValue *SDV = - DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies, - /*IsIndirect=*/false, dl, SDNodeOrder, IsVariadic); + SDDbgValue *SDV = DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies, + /*IsIndirect=*/false, DbgLoc, + SDNodeOrder, IsVariadic); DAG.AddDbgValue(SDV, /*isParameter=*/false); return true; } @@ -1472,7 +1533,7 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), DAG.getDataLayout(), InReg, Ty, - None); // This is not an ABI copy. + std::nullopt); // This is not an ABI copy. SDValue Chain = DAG.getEntryNode(); Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); @@ -1647,12 +1708,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { else Op = DAG.getConstant(0, getCurSDLoc(), EltVT); - if (isa<ScalableVectorType>(VecTy)) - return NodeMap[V] = DAG.getSplatVector(VT, getCurSDLoc(), Op); - - SmallVector<SDValue, 16> Ops; - Ops.assign(cast<FixedVectorType>(VecTy)->getNumElements(), Op); - return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); + return NodeMap[V] = DAG.getSplat(VT, getCurSDLoc(), Op); } llvm_unreachable("Unknown vector constant"); @@ -1664,16 +1720,16 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { DenseMap<const AllocaInst*, int>::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) - return DAG.getFrameIndex(SI->second, - TLI.getFrameIndexTy(DAG.getDataLayout())); + return DAG.getFrameIndex( + SI->second, TLI.getValueType(DAG.getDataLayout(), AI->getType())); } // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast<Instruction>(V)) { - unsigned InReg = FuncInfo.InitializeRegForValue(Inst); + Register InReg = FuncInfo.InitializeRegForValue(Inst); RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, - Inst->getType(), None); + Inst->getType(), std::nullopt); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } @@ -2082,7 +2138,7 @@ void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) { // Already exported? if (FuncInfo.isExportedInst(V)) return; - unsigned Reg = FuncInfo.InitializeRegForValue(V); + Register Reg = FuncInfo.InitializeRegForValue(V); CopyValueToVirtualRegister(V, Reg); } @@ -2536,6 +2592,8 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, MVT::Other, getControlRoot(), Cond, DAG.getBasicBlock(CB.TrueBB)); + setValue(CurInst, BrCond); + // Insert the false branch. Do this even if it's a fall through branch, // this makes it easier to do DAG optimizations which require inverting // the branch condition. @@ -2746,7 +2804,8 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { CallOptions.setDiscardResult(true); SDValue Chain = TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, - None, CallOptions, getCurSDLoc()).second; + std::nullopt, CallOptions, getCurSDLoc()) + .second; // On PS4/PS5, the "return address" must still be within the calling // function, even if it's at the very end, so emit an explicit TRAP here. // Passing 'true' for doesNotReturn above won't generate the trap for us. @@ -2835,7 +2894,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, MVT VT = BB.RegVT; SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT); SDValue Cmp; - unsigned PopCount = countPopulation(B.Mask); + unsigned PopCount = llvm::popcount(B.Mask); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (PopCount == 1) { // Testing for a single bit; just compare the shift count with what it @@ -3000,7 +3059,8 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) { BasicBlock *Dest = I.getIndirectDest(i); MachineBasicBlock *Target = FuncInfo.MBBMap[Dest]; Target->setIsInlineAsmBrIndirectTarget(); - Target->setHasAddressTaken(); + Target->setMachineBlockAddressTaken(); + Target->setLabelMustBeEmitted(); // Don't add duplicate machine successors. if (Dests.insert(Dest).second) addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero()); @@ -3279,7 +3339,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { Flags.copyFMF(*FPOp); // Min/max matching is only viable if all output VTs are the same. - if (is_splat(ValueVTs)) { + if (all_equal(ValueVTs)) { EVT VT = ValueVTs[0]; LLVMContext &Ctx = *DAG.getContext(); auto &TLI = DAG.getTargetLoweringInfo(); @@ -3339,7 +3399,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { break; case SPF_NABS: Negate = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case SPF_ABS: IsUnaryAbs = true; Opc = ISD::ABS; @@ -3375,8 +3435,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { Values[i] = DAG.getNode(OpCode, dl, VT, LHSVal.getValue(LHSVal.getResNo() + i)); if (Negate) - Values[i] = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), - Values[i]); + Values[i] = DAG.getNegative(Values[i], dl, VT); } } else { for (unsigned i = 0; i != NumValues; ++i) { @@ -3537,7 +3596,7 @@ void SelectionDAGBuilder::visitInsertElement(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); SDValue InVal = getValue(I.getOperand(1)); - SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(), + SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(), TLI.getVectorIdxTy(DAG.getDataLayout())); setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()), @@ -3547,7 +3606,7 @@ void SelectionDAGBuilder::visitInsertElement(const User &I) { void SelectionDAGBuilder::visitExtractElement(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); - SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(), + SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(), TLI.getVectorIdxTy(DAG.getDataLayout())); setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()), @@ -3716,7 +3775,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { } // Calculate new mask. - SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end()); + SmallVector<int, 8> MappedOps(Mask); for (int &Idx : MappedOps) { if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; @@ -3856,10 +3915,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (IsVectorGEP && !N.getValueType().isVector()) { LLVMContext &Context = *DAG.getContext(); EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount); - if (VectorElementCount.isScalable()) - N = DAG.getSplatVector(VT, dl, N); - else - N = DAG.getSplatBuildVector(VT, dl, N); + N = DAG.getSplat(VT, dl, N); } for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I); @@ -3891,7 +3947,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { DAG.getDataLayout().getTypeAllocSize(GTI.getIndexedType()); // We intentionally mask away the high bits here; ElementSize may not // fit in IdxTy. - APInt ElementMul(IdxSize, ElementSize.getKnownMinSize()); + APInt ElementMul(IdxSize, ElementSize.getKnownMinValue()); bool ElementScalable = ElementSize.isScalable(); // If this is a scalar constant or a splat vector of constants, @@ -3931,10 +3987,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (!IdxN.getValueType().isVector() && IsVectorGEP) { EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorElementCount); - if (VectorElementCount.isScalable()) - IdxN = DAG.getSplatVector(VT, dl, IdxN); - else - IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); + IdxN = DAG.getSplat(VT, dl, IdxN); } // If the index is smaller or larger than intptr_t, truncate or extend @@ -4000,7 +4053,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { SDValue AllocSize = getValue(I.getArraySize()); - EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace()); + EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), I.getAddressSpace()); if (AllocSize.getValueType() != IntPtr) AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr); @@ -4019,7 +4072,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign(); if (*Alignment <= StackAlign) - Alignment = None; + Alignment = std::nullopt; const uint64_t StackAlignMask = StackAlign.value() - 1U; // Round the size of the allocation up to the stack alignment size @@ -4068,11 +4121,6 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SDValue Ptr = getValue(SV); Type *Ty = I.getType(); - Align Alignment = I.getAlign(); - - AAMDNodes AAInfo = I.getAAMetadata(); - const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); - SmallVector<EVT, 4> ValueVTs, MemVTs; SmallVector<uint64_t, 4> Offsets; ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets); @@ -4080,9 +4128,12 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (NumValues == 0) return; + Align Alignment = I.getAlign(); + AAMDNodes AAInfo = I.getAAMetadata(); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); bool isVolatile = I.isVolatile(); MachineMemOperand::Flags MMOFlags = - TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); + TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo); SDValue Root; bool ConstantMemory = false; @@ -4100,11 +4151,6 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Root = DAG.getEntryNode(); ConstantMemory = true; MMOFlags |= MachineMemOperand::MOInvariant; - - // FIXME: pointsToConstantMemory probably does not imply dereferenceable, - // but the previous usage implied it did. Probably should check - // isDereferenceableAndAlignedPointer. - MMOFlags |= MachineMemOperand::MODereferenceable; } else { // Do not serialize non-volatile loads against each other. Root = DAG.getRoot(); @@ -4135,7 +4181,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (ChainI == MaxParallelChains) { assert(PendingLoads.empty() && "PendingLoads must be serialized first"); SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - makeArrayRef(Chains.data(), ChainI)); + ArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } @@ -4157,7 +4203,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (!ConstantMemory) { SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - makeArrayRef(Chains.data(), ChainI)); + ArrayRef(Chains.data(), ChainI)); if (isVolatile) DAG.setRoot(Chain); else @@ -4278,7 +4324,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { // See visitLoad comments. if (ChainI == MaxParallelChains) { SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - makeArrayRef(Chains.data(), ChainI)); + ArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } @@ -4294,7 +4340,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { } SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - makeArrayRef(Chains.data(), ChainI)); + ArrayRef(Chains.data(), ChainI)); + setValue(&I, StoreNode); DAG.setRoot(StoreNode); } @@ -4316,7 +4363,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, Src0 = I.getArgOperand(0); Ptr = I.getArgOperand(1); Mask = I.getArgOperand(2); - Alignment = None; + Alignment = std::nullopt; }; Value *PtrOperand, *MaskOperand, *Src0Operand; @@ -4400,17 +4447,17 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index, if (BasePtr->getType()->isVectorTy() || !IndexVal->getType()->isVectorTy()) return false; + uint64_t ScaleVal = DL.getTypeAllocSize(GEP->getResultElementType()); + + // Target may not support the required addressing mode. + if (ScaleVal != 1 && + !TLI.isLegalScaleForGatherScatter(ScaleVal, ElemSize)) + return false; + Base = SDB->getValue(BasePtr); Index = SDB->getValue(IndexVal); IndexType = ISD::SIGNED_SCALED; - // MGATHER/MSCATTER are only required to support scaling by one or by the - // element size. Other scales may be produced using target-specific DAG - // combines. - uint64_t ScaleVal = DL.getTypeAllocSize(GEP->getResultElementType()); - if (ScaleVal != ElemSize && ScaleVal != 1) - return false; - Scale = DAG.getTargetConstant(ScaleVal, SDB->getCurSDLoc(), TLI.getPointerTy(DL)); return true; @@ -4478,7 +4525,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { MaybeAlign &Alignment) { // @llvm.masked.expandload.*(Ptr, Mask, Src0) Ptr = I.getArgOperand(0); - Alignment = None; + Alignment = std::nullopt; Mask = I.getArgOperand(1); Src0 = I.getArgOperand(2); }; @@ -4624,6 +4671,12 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break; case AtomicRMWInst::FMax: NT = ISD::ATOMIC_LOAD_FMAX; break; case AtomicRMWInst::FMin: NT = ISD::ATOMIC_LOAD_FMIN; break; + case AtomicRMWInst::UIncWrap: + NT = ISD::ATOMIC_LOAD_UINC_WRAP; + break; + case AtomicRMWInst::UDecWrap: + NT = ISD::ATOMIC_LOAD_UDEC_WRAP; + break; } AtomicOrdering Ordering = I.getOrdering(); SyncScope::ID SSID = I.getSyncScopeID(); @@ -4659,7 +4712,9 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { TLI.getFenceOperandTy(DAG.getDataLayout())); Ops[2] = DAG.getTargetConstant(I.getSyncScopeID(), dl, TLI.getFenceOperandTy(DAG.getDataLayout())); - DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); + SDValue N = DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops); + setValue(&I, N); + DAG.setRoot(N); } void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { @@ -4677,7 +4732,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { I.getAlign().value() < MemVT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); - auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); + auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(), @@ -4726,7 +4781,8 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); - if (I.getAlign().value() < MemVT.getSizeInBits() / 8) + if (!TLI.supportsUnalignedAtomics() && + I.getAlign().value() < MemVT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); auto Flags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout()); @@ -4745,13 +4801,14 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { // TODO: Once this is better exercised by tests, it should be merged with // the normal path for stores to prevent future divergence. SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO); + setValue(&I, S); DAG.setRoot(S); return; } SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain, Ptr, Val, MMO); - + setValue(&I, OutChain); DAG.setRoot(OutChain); } @@ -4826,13 +4883,21 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Create the node. SDValue Result; + // In some cases, custom collection of operands from CallInst I may be needed. + TLI.CollectTargetIntrinsicOperands(I, Ops, DAG); if (IsTgtIntrinsic) { // This is target intrinsic that touches memory - Result = - DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, - MachinePointerInfo(Info.ptrVal, Info.offset), - Info.align, Info.flags, Info.size, - I.getAAMetadata()); + // + // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic + // didn't yield anything useful. + MachinePointerInfo MPI; + if (Info.ptrVal) + MPI = MachinePointerInfo(Info.ptrVal, Info.offset); + else if (Info.fallbackAddressSpace) + MPI = MachinePointerInfo(*Info.fallbackAddressSpace); + Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, + Info.memVT, MPI, Info.align, Info.flags, + Info.size, I.getAAMetadata()); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { @@ -5515,17 +5580,20 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( // For VRegs, in instruction referencing mode, create a DBG_INSTR_REF // pointing at the VReg, which will be patched up later. auto &Inst = TII->get(TargetOpcode::DBG_INSTR_REF); - auto MIB = BuildMI(MF, DL, Inst); - MIB.addReg(Reg); - MIB.addImm(0); - MIB.addMetadata(Variable); + SmallVector<MachineOperand, 1> MOs({MachineOperand::CreateReg( + /* Reg */ Reg, /* isDef */ false, /* isImp */ false, + /* isKill */ false, /* isDead */ false, + /* isUndef */ false, /* isEarlyClobber */ false, + /* SubReg */ 0, /* isDebug */ true)}); + auto *NewDIExpr = FragExpr; // We don't have an "Indirect" field in DBG_INSTR_REF, fold that into // the DIExpression. if (Indirect) NewDIExpr = DIExpression::prepend(FragExpr, DIExpression::DerefBefore); - MIB.addMetadata(NewDIExpr); - return MIB; + SmallVector<uint64_t, 2> Ops({dwarf::DW_OP_LLVM_arg, 0}); + NewDIExpr = DIExpression::prependOpcodes(NewDIExpr, Ops); + return BuildMI(MF, DL, Inst, false, MOs, Variable, NewDIExpr); } else { // Create a completely standard DBG_VALUE. auto &Inst = TII->get(TargetOpcode::DBG_VALUE); @@ -5599,7 +5667,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } bool IsIndirect = false; - Optional<MachineOperand> Op; + std::optional<MachineOperand> Op; // Some arguments' frame index is recorded during argument lowering. int FI = FuncInfo.getArgumentFrameIndex(Arg); if (FI != std::numeric_limits<int>::max()) @@ -5680,7 +5748,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (VMI != FuncInfo.ValueMap.end()) { const auto &TLI = DAG.getTargetLoweringInfo(); RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second, - V->getType(), None); + V->getType(), std::nullopt); if (RFV.occupiesMultipleRegs()) { splitMultiRegDbgValue(RFV.getRegsAndSizes()); return true; @@ -6026,6 +6094,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } case Intrinsic::dbg_addr: case Intrinsic::dbg_declare: { + // Debug intrinsics are handled seperately in assignment tracking mode. + if (isAssignmentTrackingEnabled(*I.getFunction()->getParent())) + return; // Assume dbg.addr and dbg.declare can not currently use DIArgList, i.e. // they are non-variadic. const auto &DI = cast<DbgVariableIntrinsic>(I); @@ -6125,7 +6196,16 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DAG.AddDbgLabel(SDV); return; } + case Intrinsic::dbg_assign: { + // Debug intrinsics are handled seperately in assignment tracking mode. + assert(isAssignmentTrackingEnabled(*I.getFunction()->getParent()) && + "expected assignment tracking to be enabled"); + return; + } case Intrinsic::dbg_value: { + // Debug intrinsics are handled seperately in assignment tracking mode. + if (isAssignmentTrackingEnabled(*I.getFunction()->getParent())) + return; const DbgValueInst &DI = cast<DbgValueInst>(I); assert(DI.getVariable() && "Missing variable"); @@ -6140,9 +6220,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; bool IsVariadic = DI.hasArgList(); - if (!handleDebugValue(Values, Variable, Expression, dl, DI.getDebugLoc(), + if (!handleDebugValue(Values, Variable, Expression, DI.getDebugLoc(), SDNodeOrder, IsVariadic)) - addDanglingDebugInfo(&DI, dl, SDNodeOrder); + addDanglingDebugInfo(&DI, SDNodeOrder); return; } @@ -6358,7 +6438,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // Get the last argument, the metadata and convert it to an integer in the // call Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(1))->getMetadata(); - Optional<RoundingMode> RoundMode = + std::optional<RoundingMode> RoundMode = convertStrToRoundingMode(cast<MDString>(MD)->getString()); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); @@ -6750,8 +6830,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::gcread: case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); - case Intrinsic::flt_rounds: - Res = DAG.getNode(ISD::FLT_ROUNDS_, sdl, {MVT::i32, MVT::Other}, getRoot()); + case Intrinsic::get_rounding: + Res = DAG.getNode(ISD::GET_ROUNDING, sdl, {MVT::i32, MVT::Other}, getRoot()); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return; @@ -6846,7 +6926,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Result = DAG.getMemIntrinsicNode( ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops, EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), - /* align */ None, Flags); + /* align */ std::nullopt, Flags); // Chain the prefetch in parallell with any pending loads, to stay out of // the way of later optimizations. @@ -7178,6 +7258,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DAG.getZExtOrTrunc(Const, sdl, PtrVT))); return; } + case Intrinsic::threadlocal_address: { + setValue(&I, getValue(I.getOperand(0))); + return; + } case Intrinsic::get_active_lane_mask: { EVT CCVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDValue Index = getValue(I.getOperand(0)); @@ -7191,14 +7275,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue TripCount = getValue(I.getOperand(1)); auto VecTy = CCVT.changeVectorElementType(ElementVT); - SDValue VectorIndex, VectorTripCount; - if (VecTy.isScalableVector()) { - VectorIndex = DAG.getSplatVector(VecTy, sdl, Index); - VectorTripCount = DAG.getSplatVector(VecTy, sdl, TripCount); - } else { - VectorIndex = DAG.getSplatBuildVector(VecTy, sdl, Index); - VectorTripCount = DAG.getSplatBuildVector(VecTy, sdl, TripCount); - } + SDValue VectorIndex = DAG.getSplat(VecTy, sdl, Index); + SDValue VectorTripCount = DAG.getSplat(VecTy, sdl, TripCount); SDValue VectorStep = DAG.getStepVector(sdl, VecTy); SDValue VectorInduction = DAG.getNode( ISD::UADDSAT, sdl, VecTy, VectorIndex, VectorStep); @@ -7253,11 +7331,6 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( const ConstrainedFPIntrinsic &FPI) { SDLoc sdl = getCurSDLoc(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs); - ValueVTs.push_back(MVT::Other); // Out chain - // We do not need to serialize constrained FP intrinsics against // each other or against (nonvolatile) loads, so they can be // chained like loads. @@ -7286,7 +7359,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( // The only reason why ebIgnore nodes still need to be chained is that // they might depend on the current rounding mode, and therefore must // not be moved across instruction that may change that mode. - LLVM_FALLTHROUGH; + [[fallthrough]]; case fp::ExceptionBehavior::ebMayTrap: // These must not be moved across calls or instructions that may change // floating-point exception masks. @@ -7301,7 +7374,9 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( } }; - SDVTList VTs = DAG.getVTList(ValueVTs); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), FPI.getType()); + SDVTList VTs = DAG.getVTList(VT, MVT::Other); fp::ExceptionBehavior EB = *FPI.getExceptionBehavior(); SDNodeFlags Flags; @@ -7323,8 +7398,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( Opcode = ISD::STRICT_FMA; // Break fmuladd into fmul and fadd. if (TM.Options.AllowFPOpFusion == FPOpFusion::Strict || - !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), - ValueVTs[0])) { + !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) { Opers.pop_back(); SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags); pushOutChain(Mul, EB); @@ -7365,8 +7439,18 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( } static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) { - Optional<unsigned> ResOPC; + std::optional<unsigned> ResOPC; switch (VPIntrin.getIntrinsicID()) { + case Intrinsic::vp_ctlz: { + bool IsZeroUndef = cast<ConstantInt>(VPIntrin.getArgOperand(3))->isOne(); + ResOPC = IsZeroUndef ? ISD::VP_CTLZ_ZERO_UNDEF : ISD::VP_CTLZ; + break; + } + case Intrinsic::vp_cttz: { + bool IsZeroUndef = cast<ConstantInt>(VPIntrin.getArgOperand(3))->isOne(); + ResOPC = IsZeroUndef ? ISD::VP_CTTZ_ZERO_UNDEF : ISD::VP_CTTZ; + break; + } #define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \ case Intrinsic::VPID: \ ResOPC = ISD::VPSD; \ @@ -7388,118 +7472,133 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) { return *ResOPC; } -void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT, - SmallVector<SDValue, 7> &OpValues, - bool IsGather) { +void SelectionDAGBuilder::visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT, + SmallVector<SDValue, 7> &OpValues) { SDLoc DL = getCurSDLoc(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); Value *PtrOperand = VPIntrin.getArgOperand(0); MaybeAlign Alignment = VPIntrin.getPointerAlignment(); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range); SDValue LD; bool AddToChain = true; - if (!IsGather) { - // Do not serialize variable-length loads of constant memory with - // anything. - if (!Alignment) - Alignment = DAG.getEVTAlign(VT); - MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); - AddToChain = !AA || !AA->pointsToConstantMemory(ML); - SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); - MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, - MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); - LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2], - MMO, false /*IsExpanding */); - } else { - if (!Alignment) - Alignment = DAG.getEVTAlign(VT.getScalarType()); - unsigned AS = - PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); - MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(AS), MachineMemOperand::MOLoad, - MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); - SDValue Base, Index, Scale; - ISD::MemIndexType IndexType; - bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, - this, VPIntrin.getParent(), - VT.getScalarStoreSize()); - if (!UniformBase) { - Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout())); - Index = getValue(PtrOperand); - IndexType = ISD::SIGNED_SCALED; - Scale = - DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); - } - EVT IdxVT = Index.getValueType(); - EVT EltTy = IdxVT.getVectorElementType(); - if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) { - EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy); - Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index); - } - LD = DAG.getGatherVP( - DAG.getVTList(VT, MVT::Other), VT, DL, - {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO, - IndexType); - } + // Do not serialize variable-length loads of constant memory with + // anything. + if (!Alignment) + Alignment = DAG.getEVTAlign(VT); + MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); + AddToChain = !AA || !AA->pointsToConstantMemory(ML); + SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, + MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); + LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2], + MMO, false /*IsExpanding */); if (AddToChain) PendingLoads.push_back(LD.getValue(1)); setValue(&VPIntrin, LD); } -void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin, - SmallVector<SDValue, 7> &OpValues, - bool IsScatter) { +void SelectionDAGBuilder::visitVPGather(const VPIntrinsic &VPIntrin, EVT VT, + SmallVector<SDValue, 7> &OpValues) { SDLoc DL = getCurSDLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + Value *PtrOperand = VPIntrin.getArgOperand(0); + MaybeAlign Alignment = VPIntrin.getPointerAlignment(); + AAMDNodes AAInfo = VPIntrin.getAAMetadata(); + const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range); + SDValue LD; + if (!Alignment) + Alignment = DAG.getEVTAlign(VT.getScalarType()); + unsigned AS = + PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(AS), MachineMemOperand::MOLoad, + MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); + SDValue Base, Index, Scale; + ISD::MemIndexType IndexType; + bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, + this, VPIntrin.getParent(), + VT.getScalarStoreSize()); + if (!UniformBase) { + Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout())); + Index = getValue(PtrOperand); + IndexType = ISD::SIGNED_SCALED; + Scale = DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); + } + EVT IdxVT = Index.getValueType(); + EVT EltTy = IdxVT.getVectorElementType(); + if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) { + EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy); + Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index); + } + LD = DAG.getGatherVP( + DAG.getVTList(VT, MVT::Other), VT, DL, + {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO, + IndexType); + PendingLoads.push_back(LD.getValue(1)); + setValue(&VPIntrin, LD); +} + +void SelectionDAGBuilder::visitVPStore(const VPIntrinsic &VPIntrin, + SmallVector<SDValue, 7> &OpValues) { + SDLoc DL = getCurSDLoc(); Value *PtrOperand = VPIntrin.getArgOperand(1); EVT VT = OpValues[0].getValueType(); MaybeAlign Alignment = VPIntrin.getPointerAlignment(); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); SDValue ST; - if (!IsScatter) { - if (!Alignment) - Alignment = DAG.getEVTAlign(VT); - SDValue Ptr = OpValues[1]; - SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); - MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, - MemoryLocation::UnknownSize, *Alignment, AAInfo); - ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset, - OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED, - /* IsTruncating */ false, /*IsCompressing*/ false); - } else { - if (!Alignment) - Alignment = DAG.getEVTAlign(VT.getScalarType()); - unsigned AS = - PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); - MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(AS), MachineMemOperand::MOStore, - MemoryLocation::UnknownSize, *Alignment, AAInfo); - SDValue Base, Index, Scale; - ISD::MemIndexType IndexType; - bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, - this, VPIntrin.getParent(), - VT.getScalarStoreSize()); - if (!UniformBase) { - Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout())); - Index = getValue(PtrOperand); - IndexType = ISD::SIGNED_SCALED; - Scale = - DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); - } - EVT IdxVT = Index.getValueType(); - EVT EltTy = IdxVT.getVectorElementType(); - if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) { - EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy); - Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index); - } - ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL, - {getMemoryRoot(), OpValues[0], Base, Index, Scale, - OpValues[2], OpValues[3]}, - MMO, IndexType); + if (!Alignment) + Alignment = DAG.getEVTAlign(VT); + SDValue Ptr = OpValues[1]; + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, + MemoryLocation::UnknownSize, *Alignment, AAInfo); + ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset, + OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED, + /* IsTruncating */ false, /*IsCompressing*/ false); + DAG.setRoot(ST); + setValue(&VPIntrin, ST); +} + +void SelectionDAGBuilder::visitVPScatter(const VPIntrinsic &VPIntrin, + SmallVector<SDValue, 7> &OpValues) { + SDLoc DL = getCurSDLoc(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + Value *PtrOperand = VPIntrin.getArgOperand(1); + EVT VT = OpValues[0].getValueType(); + MaybeAlign Alignment = VPIntrin.getPointerAlignment(); + AAMDNodes AAInfo = VPIntrin.getAAMetadata(); + SDValue ST; + if (!Alignment) + Alignment = DAG.getEVTAlign(VT.getScalarType()); + unsigned AS = + PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(AS), MachineMemOperand::MOStore, + MemoryLocation::UnknownSize, *Alignment, AAInfo); + SDValue Base, Index, Scale; + ISD::MemIndexType IndexType; + bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, + this, VPIntrin.getParent(), + VT.getScalarStoreSize()); + if (!UniformBase) { + Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout())); + Index = getValue(PtrOperand); + IndexType = ISD::SIGNED_SCALED; + Scale = + DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); + } + EVT IdxVT = Index.getValueType(); + EVT EltTy = IdxVT.getVectorElementType(); + if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) { + EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy); + Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index); } + ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL, + {getMemoryRoot(), OpValues[0], Base, Index, Scale, + OpValues[2], OpValues[3]}, + MMO, IndexType); DAG.setRoot(ST); setValue(&VPIntrin, ST); } @@ -7626,20 +7725,78 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic( break; } case ISD::VP_LOAD: + visitVPLoad(VPIntrin, ValueVTs[0], OpValues); + break; case ISD::VP_GATHER: - visitVPLoadGather(VPIntrin, ValueVTs[0], OpValues, - Opcode == ISD::VP_GATHER); + visitVPGather(VPIntrin, ValueVTs[0], OpValues); break; case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: visitVPStridedLoad(VPIntrin, ValueVTs[0], OpValues); break; case ISD::VP_STORE: + visitVPStore(VPIntrin, OpValues); + break; case ISD::VP_SCATTER: - visitVPStoreScatter(VPIntrin, OpValues, Opcode == ISD::VP_SCATTER); + visitVPScatter(VPIntrin, OpValues); break; case ISD::EXPERIMENTAL_VP_STRIDED_STORE: visitVPStridedStore(VPIntrin, OpValues); break; + case ISD::VP_FMULADD: { + assert(OpValues.size() == 5 && "Unexpected number of operands"); + SDNodeFlags SDFlags; + if (auto *FPMO = dyn_cast<FPMathOperator>(&VPIntrin)) + SDFlags.copyFMF(*FPMO); + if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && + TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), ValueVTs[0])) { + setValue(&VPIntrin, DAG.getNode(ISD::VP_FMA, DL, VTs, OpValues, SDFlags)); + } else { + SDValue Mul = DAG.getNode( + ISD::VP_FMUL, DL, VTs, + {OpValues[0], OpValues[1], OpValues[3], OpValues[4]}, SDFlags); + SDValue Add = + DAG.getNode(ISD::VP_FADD, DL, VTs, + {Mul, OpValues[2], OpValues[3], OpValues[4]}, SDFlags); + setValue(&VPIntrin, Add); + } + break; + } + case ISD::VP_INTTOPTR: { + SDValue N = OpValues[0]; + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), VPIntrin.getType()); + EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), VPIntrin.getType()); + N = DAG.getVPPtrExtOrTrunc(getCurSDLoc(), DestVT, N, OpValues[1], + OpValues[2]); + N = DAG.getVPZExtOrTrunc(getCurSDLoc(), PtrMemVT, N, OpValues[1], + OpValues[2]); + setValue(&VPIntrin, N); + break; + } + case ISD::VP_PTRTOINT: { + SDValue N = OpValues[0]; + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + VPIntrin.getType()); + EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), + VPIntrin.getOperand(0)->getType()); + N = DAG.getVPPtrExtOrTrunc(getCurSDLoc(), PtrMemVT, N, OpValues[1], + OpValues[2]); + N = DAG.getVPZExtOrTrunc(getCurSDLoc(), DestVT, N, OpValues[1], + OpValues[2]); + setValue(&VPIntrin, N); + break; + } + case ISD::VP_ABS: + case ISD::VP_CTLZ: + case ISD::VP_CTLZ_ZERO_UNDEF: + case ISD::VP_CTTZ: + case ISD::VP_CTTZ_ZERO_UNDEF: { + // Pop is_zero_poison operand for cp.ctlz/cttz or + // is_int_min_poison operand for vp.abs. + OpValues.pop_back(); + SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues); + setValue(&VPIntrin, Result); + break; + } } } @@ -7820,6 +7977,17 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, if (TLI.supportSwiftError() && SwiftErrorVal) isTailCall = false; + ConstantInt *CFIType = nullptr; + if (CB.isIndirectCall()) { + if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_kcfi)) { + if (!TLI.supportKCFIBundles()) + report_fatal_error( + "Target doesn't support calls with kcfi operand bundles."); + CFIType = cast<ConstantInt>(Bundle->Inputs[0]); + assert(CFIType->getType()->isIntegerTy(32) && "Invalid CFI type"); + } + } + TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()) .setChain(getRoot()) @@ -7827,7 +7995,8 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, .setTailCall(isTailCall) .setConvergent(CB.isConvergent()) .setIsPreallocated( - CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0); + CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0) + .setCFIType(CFIType); std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); if (Result.first.getNode()) { @@ -8200,9 +8369,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { return; } - if (Function *F = I.getCalledFunction()) { - diagnoseDontCall(I); + diagnoseDontCall(I); + if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { // Is this an LLVM intrinsic or a target-specific intrinsic? unsigned IID = F->getIntrinsicID(); @@ -8371,7 +8540,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { assert(!I.hasOperandBundlesOtherThan( {LLVMContext::OB_deopt, LLVMContext::OB_funclet, LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated, - LLVMContext::OB_clang_arc_attachedcall}) && + LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi}) && "Cannot lower calls with arbitrary operand bundles!"); SDValue Callee = getValue(I.getCalledOperand()); @@ -8499,7 +8668,7 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, /// /// OpInfo describes the operand /// RefOpInfo describes the matching operand if any, the operand otherwise -static llvm::Optional<unsigned> +static std::optional<unsigned> getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, SDISelAsmOperandInfo &OpInfo, SDISelAsmOperandInfo &RefOpInfo) { @@ -8513,7 +8682,7 @@ getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, // No work to do for memory/address operands. if (OpInfo.ConstraintType == TargetLowering::C_Memory || OpInfo.ConstraintType == TargetLowering::C_Address) - return None; + return std::nullopt; // If this is a constraint for a single physreg, or a constraint for a // register class, find it. @@ -8523,7 +8692,7 @@ getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, &TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT); // RC is unset only on failure. Return immediately. if (!RC) - return None; + return std::nullopt; // Get the actual register value type. This is important, because the user // may have asked for (e.g.) the AX register in i32 type. We need to @@ -8568,7 +8737,7 @@ getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, // No need to allocate a matching input constraint since the constraint it's // matching to has already been allocated. if (OpInfo.isMatchingInputConstraint()) - return None; + return std::nullopt; EVT ValueVT = OpInfo.ConstraintVT; if (OpInfo.ConstraintVT == MVT::Other) @@ -8606,7 +8775,7 @@ getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, } OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); - return None; + return std::nullopt; } static unsigned @@ -8665,6 +8834,23 @@ public: } // end anonymous namespace +static bool isFunction(SDValue Op) { + if (Op && Op.getOpcode() == ISD::GlobalAddress) { + if (auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) { + auto Fn = dyn_cast_or_null<Function>(GA->getGlobal()); + + // In normal "call dllimport func" instruction (non-inlineasm) it force + // indirect access by specifing call opcode. And usually specially print + // asm with indirect symbol (i.g: "*") according to opcode. Inline asm can + // not do in this way now. (In fact, this is similar with "Data Access" + // action). So here we ignore dllimport function. + if (Fn && !Fn->hasDLLImportStorageClass()) + return true; + } + } + return false; +} + /// visitInlineAsm - Handle a call to an InlineAsm object. void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, const BasicBlock *EHPadBB) { @@ -8713,7 +8899,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, // memory and is nonvolatile. SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot(); - bool EmitEHLabels = isa<InvokeInst>(Call) && IA->canThrow(); + bool EmitEHLabels = isa<InvokeInst>(Call); if (EmitEHLabels) { assert(EHPadBB && "InvokeInst must have an EHPadBB"); } @@ -8731,8 +8917,15 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, Chain = lowerStartEH(Chain, EHPadBB, BeginLabel); } + int OpNo = -1; + SmallVector<StringRef> AsmStrs; + IA->collectAsmStrs(AsmStrs); + // Second pass over the constraints: compute which constraint option to use. for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { + if (OpInfo.hasArg() || OpInfo.Type == InlineAsm::isOutput) + OpNo++; + // If this is an output operand with a matching input operand, look up the // matching input. If their types mismatch, e.g. one is an integer, the // other is floating point, or their sizes are different, flag it as an @@ -8750,6 +8943,32 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, OpInfo.ConstraintType == TargetLowering::C_Address) continue; + // In Linux PIC model, there are 4 cases about value/label addressing: + // + // 1: Function call or Label jmp inside the module. + // 2: Data access (such as global variable, static variable) inside module. + // 3: Function call or Label jmp outside the module. + // 4: Data access (such as global variable) outside the module. + // + // Due to current llvm inline asm architecture designed to not "recognize" + // the asm code, there are quite troubles for us to treat mem addressing + // differently for same value/adress used in different instuctions. + // For example, in pic model, call a func may in plt way or direclty + // pc-related, but lea/mov a function adress may use got. + // + // Here we try to "recognize" function call for the case 1 and case 3 in + // inline asm. And try to adjust the constraint for them. + // + // TODO: Due to current inline asm didn't encourage to jmp to the outsider + // label, so here we don't handle jmp function label now, but we need to + // enhance it (especilly in PIC model) if we meet meaningful requirements. + if (OpInfo.isIndirect && isFunction(OpInfo.CallOperand) && + TLI.isInlineAsmTargetBranch(AsmStrs, OpNo) && + TM.getCodeModel() != CodeModel::Large) { + OpInfo.isIndirect = false; + OpInfo.ConstraintType = TargetLowering::C_Address; + } + // If this is a memory input, and if the operand is not indirect, do what we // need to provide an address for the memory input. if (OpInfo.ConstraintType == TargetLowering::C_Memory && @@ -8800,7 +9019,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, if (RegError) { const MachineFunction &MF = DAG.getMachineFunction(); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - const char *RegName = TRI.getName(RegError.value()); + const char *RegName = TRI.getName(*RegError); emitInlineAsmError(Call, "register '" + Twine(RegName) + "' allocated for constraint '" + Twine(OpInfo.ConstraintCode) + @@ -8959,8 +9178,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, break; } - if (OpInfo.ConstraintType == TargetLowering::C_Memory || - OpInfo.ConstraintType == TargetLowering::C_Address) { + if (OpInfo.ConstraintType == TargetLowering::C_Memory) { assert((OpInfo.isIndirect || OpInfo.ConstraintType != TargetLowering::C_Memory) && "Operand must be indirect to be a mem!"); @@ -8983,6 +9201,37 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, break; } + if (OpInfo.ConstraintType == TargetLowering::C_Address) { + assert(InOperandVal.getValueType() == + TLI.getPointerTy(DAG.getDataLayout()) && + "Address operands expect pointer values"); + + unsigned ConstraintID = + TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); + assert(ConstraintID != InlineAsm::Constraint_Unknown && + "Failed to convert memory constraint code to constraint id."); + + unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); + + SDValue AsmOp = InOperandVal; + if (isFunction(InOperandVal)) { + auto *GA = cast<GlobalAddressSDNode>(InOperandVal); + ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Func, 1); + AsmOp = DAG.getTargetGlobalAddress(GA->getGlobal(), getCurSDLoc(), + InOperandVal.getValueType(), + GA->getOffset()); + } + + // Add information to the INLINEASM node to know about this input. + ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID); + + AsmNodeOperands.push_back( + DAG.getTargetConstant(ResOpType, getCurSDLoc(), MVT::i32)); + + AsmNodeOperands.push_back(AsmOp); + break; + } + assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || OpInfo.ConstraintType == TargetLowering::C_Register) && "Unknown constraint type!"); @@ -9047,7 +9296,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, if (StructType *StructResult = dyn_cast<StructType>(CallResultType)) ResultTypes = StructResult->elements(); else if (!CallResultType->isVoidTy()) - ResultTypes = makeArrayRef(CallResultType); + ResultTypes = ArrayRef(CallResultType); auto CurResultType = ResultTypes.begin(); auto handleRegAssign = [&](SDValue V) { @@ -9327,12 +9576,11 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value."); - SDValue Chain, InFlag, Callee, NullPtr; + SDValue Chain, InFlag, Callee; SmallVector<SDValue, 32> Ops; SDLoc DL = getCurSDLoc(); Callee = getValue(CI.getCalledOperand()); - NullPtr = DAG.getIntPtrConstant(0, DL, true); // The stackmap intrinsic only records the live variables (the arguments // passed to it) and emits NOPS (if requested). Unlike the patchpoint @@ -9375,7 +9623,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { Chain = DAG.getNode(ISD::STACKMAP, DL, NodeTys, Ops); InFlag = Chain.getValue(1); - Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL); + Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InFlag, DL); // Stackmaps don't generate values, so nothing goes into the NodeMap. @@ -9898,7 +10146,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { ISD::OutputArg MyFlags( Flags, Parts[j].getValueType().getSimpleVT(), VT, i < CLI.NumFixedArgs, i, - j * Parts[j].getValueType().getStoreSize().getKnownMinSize()); + j * Parts[j].getValueType().getStoreSize().getKnownMinValue()); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) { @@ -9986,7 +10234,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } else { // Collect the legal value parts into potentially illegal values // that correspond to the original function's return values. - Optional<ISD::NodeType> AssertOp; + std::optional<ISD::NodeType> AssertOp; if (CLI.RetSExt) AssertOp = ISD::AssertSext; else if (CLI.RetZExt) @@ -10064,7 +10312,7 @@ void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, // notional registers required by the type. RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(), - None); // This is not an ABI copy. + std::nullopt); // This is not an ABI copy. SDValue Chain = DAG.getEntryNode(); if (ExtendType == ISD::ANY_EXTEND) { @@ -10425,8 +10673,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // For scalable vectors, use the minimum size; individual targets // are responsible for handling scalable vector arguments and // return values. - ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, - ArgNo, PartBase+i*RegisterVT.getStoreSize().getKnownMinSize()); + ISD::InputArg MyFlags( + Flags, RegisterVT, VT, isArgValueUsed, ArgNo, + PartBase + i * RegisterVT.getStoreSize().getKnownMinValue()); if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 @@ -10439,7 +10688,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { } if (NeedsRegBlock && Value == NumValues - 1) Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast(); - PartBase += VT.getStoreSize().getKnownMinSize(); + PartBase += VT.getStoreSize().getKnownMinValue(); } } @@ -10477,7 +10726,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { ValueVTs); MVT VT = ValueVTs[0].getSimpleVT(); MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); - Optional<ISD::NodeType> AssertOp = None; + std::optional<ISD::NodeType> AssertOp; SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, nullptr, F.getCallingConv(), AssertOp); @@ -10539,7 +10788,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // we do generate a copy for it that can be used on return from the // function. if (ArgHasUses || isSwiftErrorArg) { - Optional<ISD::NodeType> AssertOp; + std::optional<ISD::NodeType> AssertOp; if (Arg.hasAttribute(Attribute::SExt)) AssertOp = ISD::AssertSext; else if (Arg.hasAttribute(Attribute::ZExt)) @@ -10562,7 +10811,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); - SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues), + SDValue Res = DAG.getMergeValues(ArrayRef(ArgValues.data(), NumValues), SDB->getCurSDLoc()); SDB->setValue(&Arg, Res); @@ -10645,14 +10894,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) { void SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - const Instruction *TI = LLVMBB->getTerminator(); SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; // Check PHI nodes in successors that expect a value to be available from this // block. - for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { - const BasicBlock *SuccBB = TI->getSuccessor(succ); + for (const BasicBlock *SuccBB : successors(LLVMBB->getTerminator())) { if (!isa<PHINode>(SuccBB->begin())) continue; MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; @@ -10678,7 +10925,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { unsigned Reg; const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB); - if (const Constant *C = dyn_cast<Constant>(PHIOp)) { + if (const auto *C = dyn_cast<Constant>(PHIOp)) { unsigned &RegOut = ConstantsOut[C]; if (RegOut == 0) { RegOut = FuncInfo.CreateRegs(C); @@ -10709,10 +10956,9 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // the input for this MBB. SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs); - for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { - EVT VT = ValueVTs[vti]; - unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); - for (unsigned i = 0, e = NumRegisters; i != e; ++i) + for (EVT VT : ValueVTs) { + const unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); + for (unsigned i = 0; i != NumRegisters; ++i) FuncInfo.PHINodesToUpdate.push_back( std::make_pair(&*MBBI++, Reg + i)); Reg += NumRegisters; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index d1915fd4e7ae..bf2111013461 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/AssignmentTrackingAnalysis.h" #include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -33,6 +34,7 @@ #include <algorithm> #include <cassert> #include <cstdint> +#include <optional> #include <utility> #include <vector> @@ -42,6 +44,7 @@ class AAResults; class AllocaInst; class AtomicCmpXchgInst; class AtomicRMWInst; +class AssumptionCache; class BasicBlock; class BranchInst; class CallInst; @@ -103,20 +106,68 @@ class SelectionDAGBuilder { /// Helper type for DanglingDebugInfoMap. class DanglingDebugInfo { - const DbgValueInst* DI = nullptr; - DebugLoc dl; + using DbgValTy = const DbgValueInst *; + using VarLocTy = const VarLocInfo *; + PointerUnion<DbgValTy, VarLocTy> Info; unsigned SDNodeOrder = 0; public: DanglingDebugInfo() = default; - DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) - : DI(di), dl(std::move(DL)), SDNodeOrder(SDNO) {} - - const DbgValueInst* getDI() { return DI; } - DebugLoc getdl() { return dl; } - unsigned getSDNodeOrder() { return SDNodeOrder; } + DanglingDebugInfo(const DbgValueInst *DI, unsigned SDNO) + : Info(DI), SDNodeOrder(SDNO) {} + DanglingDebugInfo(const VarLocInfo *VarLoc, unsigned SDNO) + : Info(VarLoc), SDNodeOrder(SDNO) {} + + DILocalVariable *getVariable(const FunctionVarLocs *Locs) const { + if (Info.is<VarLocTy>()) + return Locs->getDILocalVariable(Info.get<VarLocTy>()->VariableID); + return Info.get<DbgValTy>()->getVariable(); + } + DIExpression *getExpression() const { + if (Info.is<VarLocTy>()) + return Info.get<VarLocTy>()->Expr; + return Info.get<DbgValTy>()->getExpression(); + } + Value *getVariableLocationOp(unsigned Idx) const { + assert(Idx == 0 && "Dangling variadic debug values not supported yet"); + if (Info.is<VarLocTy>()) + return Info.get<VarLocTy>()->V; + return Info.get<DbgValTy>()->getVariableLocationOp(Idx); + } + DebugLoc getDebugLoc() const { + if (Info.is<VarLocTy>()) + return Info.get<VarLocTy>()->DL; + return Info.get<DbgValTy>()->getDebugLoc(); + } + unsigned getSDNodeOrder() const { return SDNodeOrder; } + + /// Helper for printing DanglingDebugInfo. This hoop-jumping is to + /// accommodate the fact that an argument is required for getVariable. + /// Call SelectionDAGBuilder::printDDI instead of using directly. + struct Print { + Print(const DanglingDebugInfo &DDI, const FunctionVarLocs *VarLocs) + : DDI(DDI), VarLocs(VarLocs) {} + const DanglingDebugInfo &DDI; + const FunctionVarLocs *VarLocs; + friend raw_ostream &operator<<(raw_ostream &OS, + const DanglingDebugInfo::Print &P) { + OS << "DDI(var=" << *P.DDI.getVariable(P.VarLocs) + << ", val= " << *P.DDI.getVariableLocationOp(0) + << ", expr=" << *P.DDI.getExpression() + << ", order=" << P.DDI.getSDNodeOrder() + << ", loc=" << P.DDI.getDebugLoc() << ")"; + return OS; + } + }; }; + /// Returns an object that defines `raw_ostream &operator<<` for printing. + /// Usage example: + //// errs() << printDDI(MyDanglingInfo) << " is dangling\n"; + DanglingDebugInfo::Print printDDI(const DanglingDebugInfo &DDI) { + return DanglingDebugInfo::Print(DDI, DAG.getFunctionVarLocs()); + } + /// Helper type for DanglingDebugInfoMap. typedef std::vector<DanglingDebugInfo> DanglingDebugInfoVector; @@ -191,6 +242,7 @@ public: SelectionDAG &DAG; AAResults *AA = nullptr; + AssumptionCache *AC = nullptr; const TargetLibraryInfo *LibInfo; class SDAGSwitchLowering : public SwitchCG::SwitchLowering { @@ -244,7 +296,7 @@ public: SL(std::make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo), SwiftError(swifterror) {} - void init(GCFunctionInfo *gfi, AAResults *AA, + void init(GCFunctionInfo *gfi, AAResults *AA, AssumptionCache *AC, const TargetLibraryInfo *li); /// Clear out the current SelectionDAG and the associated state and prepare @@ -296,8 +348,8 @@ public: SDValue getCopyFromRegs(const Value *V, Type *Ty); /// Register a dbg_value which relies on a Value which we have not yet seen. - void addDanglingDebugInfo(const DbgValueInst *DI, DebugLoc DL, - unsigned Order); + void addDanglingDebugInfo(const DbgValueInst *DI, unsigned Order); + void addDanglingDebugInfo(const VarLocInfo *VarLoc, unsigned Order); /// If we have dangling debug info that describes \p Variable, or an /// overlapping part of variable considering the \p Expr, then this method @@ -317,8 +369,8 @@ public: /// For a given list of Values, attempt to create and record a SDDbgValue in /// the SelectionDAG. bool handleDebugValue(ArrayRef<const Value *> Values, DILocalVariable *Var, - DIExpression *Expr, DebugLoc CurDL, DebugLoc InstDL, - unsigned Order, bool IsVariadic); + DIExpression *Expr, DebugLoc DbgLoc, unsigned Order, + bool IsVariadic); /// Evict any dangling debug information, attempting to salvage it first. void resolveOrClearDbgInfo(); @@ -567,10 +619,14 @@ private: void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI); - void visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT, - SmallVector<SDValue, 7> &OpValues, bool IsGather); - void visitVPStoreScatter(const VPIntrinsic &VPIntrin, - SmallVector<SDValue, 7> &OpValues, bool IsScatter); + void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT, + SmallVector<SDValue, 7> &OpValues); + void visitVPStore(const VPIntrinsic &VPIntrin, + SmallVector<SDValue, 7> &OpValues); + void visitVPGather(const VPIntrinsic &VPIntrin, EVT VT, + SmallVector<SDValue, 7> &OpValues); + void visitVPScatter(const VPIntrinsic &VPIntrin, + SmallVector<SDValue, 7> &OpValues); void visitVPStridedLoad(const VPIntrinsic &VPIntrin, EVT VT, SmallVectorImpl<SDValue> &OpValues); void visitVPStridedStore(const VPIntrinsic &VPIntrin, @@ -680,14 +736,14 @@ struct RegsForValue { /// Records if this value needs to be treated in an ABI dependant manner, /// different to normal type legalization. - Optional<CallingConv::ID> CallConv; + std::optional<CallingConv::ID> CallConv; RegsForValue() = default; RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, EVT valuevt, - Optional<CallingConv::ID> CC = None); + std::optional<CallingConv::ID> CC = std::nullopt); RegsForValue(LLVMContext &Context, const TargetLowering &TLI, const DataLayout &DL, unsigned Reg, Type *Ty, - Optional<CallingConv::ID> CC); + std::optional<CallingConv::ID> CC); bool isABIMangled() const { return CallConv.has_value(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 6ba01664e756..fe4261291fc5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -79,6 +79,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::DELETED_NODE: return "<<Deleted Node!>>"; #endif case ISD::PREFETCH: return "Prefetch"; + case ISD::MEMBARRIER: return "MemBarrier"; case ISD::ATOMIC_FENCE: return "AtomicFence"; case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return "AtomicCmpSwapWithSuccess"; @@ -95,6 +96,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; case ISD::ATOMIC_LOAD_FADD: return "AtomicLoadFAdd"; + case ISD::ATOMIC_LOAD_UINC_WRAP: + return "AtomicLoadUIncWrap"; + case ISD::ATOMIC_LOAD_UDEC_WRAP: + return "AtomicLoadUDecWrap"; case ISD::ATOMIC_LOAD: return "AtomicLoad"; case ISD::ATOMIC_STORE: return "AtomicStore"; case ISD::PCMARKER: return "PCMarker"; @@ -422,7 +427,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { return "call_alloc"; // Floating point environment manipulation - case ISD::FLT_ROUNDS_: return "flt_rounds"; + case ISD::GET_ROUNDING: return "get_rounding"; case ISD::SET_ROUNDING: return "set_rounding"; // Bit manipulation @@ -1059,6 +1064,9 @@ LLVM_DUMP_METHOD void SDNode::dumprFull(const SelectionDAG *G) const { void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { printr(OS, G); + // Under VerboseDAGDumping divergence will be printed always. + if (isDivergent() && !VerboseDAGDumping) + OS << " # D:1"; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { if (i) OS << ", "; else OS << " "; printOperand(OS, G, getOperand(i)); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index d46a0a23cca3..902f46115557 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" @@ -31,6 +32,7 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/AssignmentTrackingAnalysis.h" #include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -61,6 +63,7 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DiagnosticInfo.h" @@ -101,6 +104,7 @@ #include <iterator> #include <limits> #include <memory> +#include <optional> #include <string> #include <utility> #include <vector> @@ -309,7 +313,8 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, // SelectionDAGISel code //===----------------------------------------------------------------------===// -SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) +SelectionDAGISel::SelectionDAGISel(char &ID, TargetMachine &tm, + CodeGenOpt::Level OL) : MachineFunctionPass(ID), TM(tm), FuncInfo(new FunctionLoweringInfo()), SwiftError(new SwiftErrorValueTracking()), CurDAG(new SelectionDAG(tm, OL)), @@ -336,9 +341,14 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<GCModuleInfo>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<AssumptionCacheTracker>(); if (UseMBPI && OptLevel != CodeGenOpt::None) AU.addRequired<BranchProbabilityInfoWrapperPass>(); AU.addRequired<ProfileSummaryInfoWrapperPass>(); + // AssignmentTrackingAnalysis only runs if assignment tracking is enabled for + // the module. + AU.addRequired<AssignmentTrackingAnalysis>(); + AU.addPreserved<AssignmentTrackingAnalysis>(); if (OptLevel != CodeGenOpt::None) LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); @@ -382,8 +392,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Decide what flavour of variable location debug-info will be used, before // we change the optimisation level. - UseInstrRefDebugInfo = mf.useDebugInstrRef(); - CurDAG->useInstrRefDebugInfo(UseInstrRefDebugInfo); + bool InstrRef = mf.shouldUseDebugInstrRef(); + mf.setUseDebugInstrRef(InstrRef); // Reset the target options before resetting the optimization // level below. @@ -403,15 +413,21 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(Fn); GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr; ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn); + AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(mf.getFunction()); auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); BlockFrequencyInfo *BFI = nullptr; if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOpt::None) BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI(); + FunctionVarLocs const *FnVarLocs = nullptr; + if (isAssignmentTrackingEnabled(*Fn.getParent())) + FnVarLocs = getAnalysis<AssignmentTrackingAnalysis>().getResults(); + LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); CurDAG->init(*MF, *ORE, this, LibInfo, - getAnalysisIfAvailable<LegacyDivergenceAnalysis>(), PSI, BFI); + getAnalysisIfAvailable<LegacyDivergenceAnalysis>(), PSI, BFI, + FnVarLocs); FuncInfo->set(Fn, *MF, CurDAG); SwiftError->setFunction(*MF); @@ -430,7 +446,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { else AA = nullptr; - SDB->init(GFI, AA, LibInfo); + SDB->init(GFI, AA, AC, LibInfo); MF->setHasInlineAsm(false); @@ -488,7 +504,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { To = J->second; } // Make sure the new register has a sufficiently constrained register class. - if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To)) + if (From.isVirtual() && To.isVirtual()) MRI.constrainRegClass(To, MRI.getRegClass(From)); // Replace it. @@ -530,15 +546,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { LiveInMap.insert(LI); // Insert DBG_VALUE instructions for function arguments to the entry block. - bool InstrRef = MF->useDebugInstrRef(); for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { MachineInstr *MI = FuncInfo->ArgDbgValues[e - i - 1]; assert(MI->getOpcode() != TargetOpcode::DBG_VALUE_LIST && "Function parameters should not be described by DBG_VALUE_LIST."); - bool hasFI = MI->getOperand(0).isFI(); + bool hasFI = MI->getDebugOperand(0).isFI(); Register Reg = - hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); - if (Register::isPhysicalRegister(Reg)) + hasFI ? TRI.getFrameRegister(*MF) : MI->getDebugOperand(0).getReg(); + if (Reg.isPhysical()) EntryMBB->insert(EntryMBB->begin(), MI); else { MachineInstr *Def = RegInfo->getVRegDef(Reg); @@ -567,7 +582,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { DebugLoc DL = MI->getDebugLoc(); bool IsIndirect = MI->isIndirectDebugValue(); if (IsIndirect) - assert(MI->getOperand(1).getImm() == 0 && + assert(MI->getDebugOffset().getImm() == 0 && "DBG_VALUE with nonzero offset"); assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); @@ -608,7 +623,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // For debug-info, in instruction referencing mode, we need to perform some // post-isel maintenence. - if (UseInstrRefDebugInfo) + if (MF->useDebugInstrRef()) MF->finalizeDebugInstrRefs(); // Determine if there are any calls in this machine function. @@ -997,6 +1012,15 @@ public: if (ISelPosition == SelectionDAG::allnodes_iterator(N)) ++ISelPosition; } + + /// NodeInserted - Handle new nodes inserted into the graph: propagate + /// metadata from root nodes that also applies to new nodes, in case the root + /// is later deleted. + void NodeInserted(SDNode *N) override { + SDNode *CurNode = &*ISelPosition; + if (MDNode *MD = DAG.getPCSections(CurNode)) + DAG.addPCSections(N, MD); + } }; } // end anonymous namespace @@ -1073,7 +1097,7 @@ void SelectionDAGISel::DoInstructionSelection() { ++ISelPosition; // Make sure that ISelPosition gets properly updated when nodes are deleted - // in calls made from this function. + // in calls made from this function. New nodes inherit relevant metadata. ISelUpdater ISU(*CurDAG, ISelPosition); // The AllNodes list is now topological-sorted. Visit the @@ -1181,11 +1205,11 @@ static void mapWasmLandingPadIndex(MachineBasicBlock *MBB, // In case of single catch (...), we don't emit LSDA, so we don't need // this information. bool IsSingleCatchAllClause = - CPI->getNumArgOperands() == 1 && + CPI->arg_size() == 1 && cast<Constant>(CPI->getArgOperand(0))->isNullValue(); // cathchpads for longjmp use an empty type list, e.g. catchpad within %0 [] // and they don't need LSDA info - bool IsCatchLongjmp = CPI->getNumArgOperands() == 0; + bool IsCatchLongjmp = CPI->arg_size() == 0; if (!IsSingleCatchAllClause && !IsCatchLongjmp) { // Create a mapping from landing pad label to landing pad index. bool IntrFound = false; @@ -1279,56 +1303,75 @@ static bool isFoldedOrDeadInstruction(const Instruction *I, !FuncInfo.isExportedInst(I); // Exported instrs must be computed. } +static void processDbgDeclare(FunctionLoweringInfo &FuncInfo, + const Value *Address, DIExpression *Expr, + DILocalVariable *Var, DebugLoc DbgLoc) { + MachineFunction *MF = FuncInfo.MF; + const DataLayout &DL = MF->getDataLayout(); + + assert(Var && "Missing variable"); + assert(DbgLoc && "Missing location"); + + // Look through casts and constant offset GEPs. These mostly come from + // inalloca. + APInt Offset(DL.getTypeSizeInBits(Address->getType()), 0); + Address = Address->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); + + // Check if the variable is a static alloca or a byval or inalloca + // argument passed in memory. If it is not, then we will ignore this + // intrinsic and handle this during isel like dbg.value. + int FI = std::numeric_limits<int>::max(); + if (const auto *AI = dyn_cast<AllocaInst>(Address)) { + auto SI = FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) + FI = SI->second; + } else if (const auto *Arg = dyn_cast<Argument>(Address)) + FI = FuncInfo.getArgumentFrameIndex(Arg); + + if (FI == std::numeric_limits<int>::max()) + return; + + if (Offset.getBoolValue()) + Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, + Offset.getZExtValue()); + + LLVM_DEBUG(dbgs() << "processDbgDeclare: setVariableDbgInfo Var=" << *Var + << ", Expr=" << *Expr << ", FI=" << FI + << ", DbgLoc=" << DbgLoc << "\n"); + MF->setVariableDbgInfo(Var, Expr, FI, DbgLoc); +} + /// Collect llvm.dbg.declare information. This is done after argument lowering /// in case the declarations refer to arguments. static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) { - MachineFunction *MF = FuncInfo.MF; - const DataLayout &DL = MF->getDataLayout(); for (const BasicBlock &BB : *FuncInfo.Fn) { for (const Instruction &I : BB) { - const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I); - if (!DI) - continue; - - assert(DI->getVariable() && "Missing variable"); - assert(DI->getDebugLoc() && "Missing location"); - const Value *Address = DI->getAddress(); - if (!Address) { - LLVM_DEBUG(dbgs() << "processDbgDeclares skipping " << *DI - << " (bad address)\n"); - continue; + if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I)) { + Value *Address = DI->getAddress(); + if (!Address) { + LLVM_DEBUG(dbgs() << "processDbgDeclares skipping " << *DI + << " (bad address)\n"); + continue; + } + processDbgDeclare(FuncInfo, Address, DI->getExpression(), + DI->getVariable(), DI->getDebugLoc()); } - - // Look through casts and constant offset GEPs. These mostly come from - // inalloca. - APInt Offset(DL.getTypeSizeInBits(Address->getType()), 0); - Address = Address->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); - - // Check if the variable is a static alloca or a byval or inalloca - // argument passed in memory. If it is not, then we will ignore this - // intrinsic and handle this during isel like dbg.value. - int FI = std::numeric_limits<int>::max(); - if (const auto *AI = dyn_cast<AllocaInst>(Address)) { - auto SI = FuncInfo.StaticAllocaMap.find(AI); - if (SI != FuncInfo.StaticAllocaMap.end()) - FI = SI->second; - } else if (const auto *Arg = dyn_cast<Argument>(Address)) - FI = FuncInfo.getArgumentFrameIndex(Arg); - - if (FI == std::numeric_limits<int>::max()) - continue; - - DIExpression *Expr = DI->getExpression(); - if (Offset.getBoolValue()) - Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, - Offset.getZExtValue()); - LLVM_DEBUG(dbgs() << "processDbgDeclares: setVariableDbgInfo FI=" << FI - << ", " << *DI << "\n"); - MF->setVariableDbgInfo(DI->getVariable(), Expr, FI, DI->getDebugLoc()); } } } +/// Collect single location variable information generated with assignment +/// tracking. This is done after argument lowering in case the declarations +/// refer to arguments. +static void processSingleLocVars(FunctionLoweringInfo &FuncInfo, + FunctionVarLocs const *FnVarLocs) { + for (auto It = FnVarLocs->single_locs_begin(), + End = FnVarLocs->single_locs_end(); + It != End; ++It) + processDbgDeclare(FuncInfo, It->V, It->Expr, + FnVarLocs->getDILocalVariable(It->VariableID), It->DL); +} + void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastISelFailed = false; // Initialize the Fast-ISel state, if needed. @@ -1336,8 +1379,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (TM.Options.EnableFastISel) { LLVM_DEBUG(dbgs() << "Enabling fast-isel\n"); FastIS = TLI->createFastISel(*FuncInfo, LibInfo); - if (FastIS) - FastIS->useInstrRefDebugInfo(UseInstrRefDebugInfo); } ReversePostOrderTraversal<const Function*> RPOT(&Fn); @@ -1391,7 +1432,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (FastIS && Inserted) FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt)); - processDbgDeclares(*FuncInfo); + if (isAssignmentTrackingEnabled(*Fn.getParent())) { + assert(CurDAG->getFunctionVarLocs() && + "expected AssignmentTrackingAnalysis pass results"); + processSingleLocVars(*FuncInfo, CurDAG->getFunctionVarLocs()); + } else { + processDbgDeclares(*FuncInfo); + } // Iterate over all basic blocks in the function. StackProtector &SP = getAnalysis<StackProtector>(); @@ -1957,7 +2004,7 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, while (i != e) { unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue(); - if (!InlineAsm::isMemKind(Flags)) { + if (!InlineAsm::isMemKind(Flags) && !InlineAsm::isFuncKind(Flags)) { // Just skip over this operand, copying the operands verbatim. Ops.insert(Ops.end(), InOps.begin()+i, InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1); @@ -1986,7 +2033,9 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, // Add this to the output node. unsigned NewFlags = - InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size()); + InlineAsm::isMemKind(Flags) + ? InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size()) + : InlineAsm::getFlagWord(InlineAsm::Kind_Func, SelOps.size()); NewFlags = InlineAsm::getFlagWordForMem(NewFlags, ConstraintID); Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32)); llvm::append_range(Ops, SelOps); @@ -2193,6 +2242,11 @@ void SelectionDAGISel::Select_ARITH_FENCE(SDNode *N) { N->getOperand(0)); } +void SelectionDAGISel::Select_MEMBARRIER(SDNode *N) { + CurDAG->SelectNodeTo(N, TargetOpcode::MEMBARRIER, N->getValueType(0), + N->getOperand(0)); +} + void SelectionDAGISel::pushStackMapLiveVariable(SmallVectorImpl<SDValue> &Ops, SDValue OpVal, SDLoc DL) { SDNode *OpNode = OpVal.getNode(); @@ -2249,7 +2303,7 @@ void SelectionDAGISel::Select_PATCHPOINT(SDNode *N) { // Cache arguments that will be moved to the end in the target node. SDValue Chain = *It++; - Optional<SDValue> Glue; + std::optional<SDValue> Glue; if (It->getValueType() == MVT::Glue) Glue = *It++; SDValue RegMask = *It++; @@ -2287,7 +2341,7 @@ void SelectionDAGISel::Select_PATCHPOINT(SDNode *N) { Ops.push_back(RegMask); Ops.push_back(Chain); if (Glue.has_value()) - Ops.push_back(Glue.value()); + Ops.push_back(*Glue); SDVTList NodeTys = N->getVTList(); CurDAG->SelectNodeTo(N, TargetOpcode::PATCHPOINT, NodeTys, Ops); @@ -2847,6 +2901,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::ARITH_FENCE: Select_ARITH_FENCE(NodeToMatch); return; + case ISD::MEMBARRIER: + Select_MEMBARRIER(NodeToMatch); + return; case ISD::STACKMAP: Select_STACKMAP(NodeToMatch); return; @@ -3764,5 +3821,3 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) { } report_fatal_error(Twine(Msg.str())); } - -char SelectionDAGISel::ID = 0; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index c5c093ae228f..57bfe344dbab 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -14,8 +14,6 @@ #include "StatepointLowering.h" #include "SelectionDAGBuilder.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" @@ -160,12 +158,12 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, /// Utility function for reservePreviousStackSlotForValue. Tries to find /// stack slot index to which we have spilled value for previous statepoints. /// LookUpDepth specifies maximum DFS depth this function is allowed to look. -static Optional<int> findPreviousSpillSlot(const Value *Val, - SelectionDAGBuilder &Builder, - int LookUpDepth) { +static std::optional<int> findPreviousSpillSlot(const Value *Val, + SelectionDAGBuilder &Builder, + int LookUpDepth) { // Can not look any further - give up now if (LookUpDepth <= 0) - return None; + return std::nullopt; // Spill location is known for gc relocates if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) { @@ -173,18 +171,18 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, assert((isa<GCStatepointInst>(Statepoint) || isa<UndefValue>(Statepoint)) && "GetStatepoint must return one of two types"); if (isa<UndefValue>(Statepoint)) - return None; + return std::nullopt; const auto &RelocationMap = Builder.FuncInfo.StatepointRelocationMaps [cast<GCStatepointInst>(Statepoint)]; auto It = RelocationMap.find(Relocate); if (It == RelocationMap.end()) - return None; + return std::nullopt; auto &Record = It->second; if (Record.type != RecordType::Spill) - return None; + return std::nullopt; return Record.payload.FI; } @@ -197,16 +195,16 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, // All incoming values should have same known stack slot, otherwise result // is unknown. if (const PHINode *Phi = dyn_cast<PHINode>(Val)) { - Optional<int> MergedResult = None; + std::optional<int> MergedResult; for (const auto &IncomingValue : Phi->incoming_values()) { - Optional<int> SpillSlot = + std::optional<int> SpillSlot = findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1); if (!SpillSlot) - return None; + return std::nullopt; if (MergedResult && *MergedResult != *SpillSlot) - return None; + return std::nullopt; MergedResult = SpillSlot; } @@ -241,7 +239,7 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, // which we visit values is unspecified. // Don't know any information about this instruction - return None; + return std::nullopt; } /// Return true if-and-only-if the given SDValue can be lowered as either a @@ -284,7 +282,7 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue, return; const int LookUpDepth = 6; - Optional<int> Index = + std::optional<int> Index = findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth); if (!Index) return; @@ -321,7 +319,7 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue, /// reference lowered call result static std::pair<SDValue, SDNode *> lowerCallFromStatepointLoweringInfo( SelectionDAGBuilder::StatepointLoweringInfo &SI, - SelectionDAGBuilder &Builder, SmallVectorImpl<SDValue> &PendingExports) { + SelectionDAGBuilder &Builder) { SDValue ReturnValue, CallEndVal; std::tie(ReturnValue, CallEndVal) = Builder.lowerInvokable(SI.CLI, SI.EHPadBB); @@ -526,34 +524,6 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, SelectionDAGBuilder &Builder) { // Lower the deopt and gc arguments for this statepoint. Layout will be: // deopt argument length, deopt arguments.., gc arguments... -#ifndef NDEBUG - if (auto *GFI = Builder.GFI) { - // Check that each of the gc pointer and bases we've gotten out of the - // safepoint is something the strategy thinks might be a pointer (or vector - // of pointers) into the GC heap. This is basically just here to help catch - // errors during statepoint insertion. TODO: This should actually be in the - // Verifier, but we can't get to the GCStrategy from there (yet). - GCStrategy &S = GFI->getStrategy(); - for (const Value *V : SI.Bases) { - auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); - if (Opt) { - assert(Opt.value() && - "non gc managed base pointer found in statepoint"); - } - } - for (const Value *V : SI.Ptrs) { - auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); - if (Opt) { - assert(Opt.value() && - "non gc managed derived pointer found in statepoint"); - } - } - assert(SI.Bases.size() == SI.Ptrs.size() && "Pointer without base!"); - } else { - assert(SI.Bases.empty() && "No gc specified, so cannot relocate pointers!"); - assert(SI.Ptrs.empty() && "No gc specified, so cannot relocate pointers!"); - } -#endif // Figure out what lowering strategy we're going to use for each part // Note: Is is conservatively correct to lower both "live-in" and "live-out" @@ -742,7 +712,9 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( NumOfStatepoints++; // Clear state StatepointLowering.startNewStatepoint(*this); - assert(SI.Bases.size() == SI.Ptrs.size()); + assert(SI.Bases.size() == SI.Ptrs.size() && "Pointer without base!"); + assert((GFI || SI.Bases.empty()) && + "No gc specified, so cannot relocate pointers!"); LLVM_DEBUG(dbgs() << "Lowering statepoint " << *SI.StatepointInstr << "\n"); #ifndef NDEBUG @@ -770,8 +742,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( // Get call node, we will replace it later with statepoint SDValue ReturnVal; SDNode *CallNode; - std::tie(ReturnVal, CallNode) = - lowerCallFromStatepointLoweringInfo(SI, *this, PendingExports); + std::tie(ReturnVal, CallNode) = lowerCallFromStatepointLoweringInfo(SI, *this); // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END // nodes with all the appropriate arguments and return values. @@ -921,7 +892,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( auto *RetTy = Relocate->getType(); Register Reg = FuncInfo.CreateRegs(RetTy); RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), - DAG.getDataLayout(), Reg, RetTy, None); + DAG.getDataLayout(), Reg, RetTy, std::nullopt); SDValue Chain = DAG.getRoot(); RFV.getCopyToRegs(Relocated, DAG, getCurSDLoc(), Chain, nullptr); PendingExports.push_back(Chain); @@ -1148,7 +1119,7 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I, // TODO: To eliminate this problem we can remove gc.result intrinsics // completely and make statepoint call to return a tuple. Type *RetTy = GCResultLocality.second->getType(); - unsigned Reg = FuncInfo.CreateRegs(RetTy); + Register Reg = FuncInfo.CreateRegs(RetTy); RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Reg, RetTy, I.getCallingConv()); @@ -1239,10 +1210,6 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { if (cast<GCStatepointInst>(Statepoint)->getParent() == Relocate.getParent()) StatepointLowering.relocCallVisited(Relocate); - - auto *Ty = Relocate.getType()->getScalarType(); - if (auto IsManaged = GFI->getStrategy().isGCManagedPointer(Ty)) - assert(*IsManaged && "Non gc managed pointer relocated!"); #endif const Value *DerivedPtr = Relocate.getDerivedPtr(); @@ -1266,7 +1233,7 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { Register InReg = Record.payload.Reg; RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), DAG.getDataLayout(), InReg, Relocate.getType(), - None); // This is not an ABI copy. + std::nullopt); // This is not an ABI copy. // We generate copy to/from regs even for local uses, hence we must // chain with current root to ensure proper ordering of copies w.r.t. // statepoint. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 6205e74837c0..8d4c8802f71c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -12,6 +12,7 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -260,7 +261,7 @@ bool TargetLowering::findOptimalMemOpLowering( // If the new VT cannot cover all of the remaining bits, then consider // issuing a (or a pair of) unaligned and overlapping load / store. - bool Fast; + unsigned Fast; if (NumMemOps && Op.allowOverlap() && NewVTSize < Size && allowsMisalignedMemoryAccesses( VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1), @@ -351,7 +352,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, break; case ISD::SETO: ShouldInvertCC = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETUO: LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : (VT == MVT::f64) ? RTLIB::UO_F64 : @@ -360,7 +361,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, case ISD::SETONE: // SETONE = O && UNE ShouldInvertCC = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETUEQ: LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : (VT == MVT::f64) ? RTLIB::UO_F64 : @@ -397,7 +398,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, } } - // Use the target specific return value for comparions lib calls. + // Use the target specific return value for comparison lib calls. EVT RetVT = getCmpLibcallReturnType(); SDValue Ops[2] = {NewLHS, NewRHS}; TargetLowering::MakeLibCallOptions CallOptions; @@ -633,35 +634,22 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, bool AssumeSingleUse) const { EVT VT = Op.getValueType(); - // TODO: We can probably do more work on calculating the known bits and - // simplifying the operations for scalable vectors, but for now we just - // bail out. - if (VT.isScalableVector()) { - // Pretend we don't know anything for now. - Known = KnownBits(DemandedBits.getBitWidth()); - return false; - } - - APInt DemandedElts = VT.isVector() + // Since the number of lanes in a scalable vector is unknown at compile time, + // we track one bit which is implicitly broadcast to all lanes. This means + // that all lanes in a scalable vector are considered demanded. + APInt DemandedElts = VT.isFixedLengthVector() ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth, AssumeSingleUse); } -// TODO: Can we merge SelectionDAG::GetDemandedBits into this? // TODO: Under what circumstances can we create nodes? Constant folding? SDValue TargetLowering::SimplifyMultipleUseDemandedBits( SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const { EVT VT = Op.getValueType(); - // Pretend we don't know anything about scalable vectors for now. - // TODO: We can probably do more work on simplifying the operations for - // scalable vectors, but for now we just bail out. - if (VT.isScalableVector()) - return SDValue(); - // Limit search depth. if (Depth >= SelectionDAG::MaxRecursionDepth) return SDValue(); @@ -680,6 +668,9 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( KnownBits LHSKnown, RHSKnown; switch (Op.getOpcode()) { case ISD::BITCAST: { + if (VT.isScalableVector()) + return SDValue(); + SDValue Src = peekThroughBitcasts(Op.getOperand(0)); EVT SrcVT = Src.getValueType(); EVT DstVT = Op.getValueType(); @@ -825,6 +816,9 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: case ISD::ZERO_EXTEND_VECTOR_INREG: { + if (VT.isScalableVector()) + return SDValue(); + // If we only want the lowest element and none of extended bits, then we can // return the bitcasted source vector. SDValue Src = Op.getOperand(0); @@ -838,6 +832,9 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( break; } case ISD::INSERT_VECTOR_ELT: { + if (VT.isScalableVector()) + return SDValue(); + // If we don't demand the inserted element, return the base vector. SDValue Vec = Op.getOperand(0); auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); @@ -848,6 +845,9 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( break; } case ISD::INSERT_SUBVECTOR: { + if (VT.isScalableVector()) + return SDValue(); + SDValue Vec = Op.getOperand(0); SDValue Sub = Op.getOperand(1); uint64_t Idx = Op.getConstantOperandVal(2); @@ -857,6 +857,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( if (DemandedSubElts == 0) return Vec; // If this simply widens the lowest subvector, see if we can do it earlier. + // TODO: REMOVE ME - SimplifyMultipleUseDemandedBits shouldn't be creating + // general nodes like this. if (Idx == 0 && Vec.isUndef()) { if (SDValue NewSub = SimplifyMultipleUseDemandedBits( Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1)) @@ -866,6 +868,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( break; } case ISD::VECTOR_SHUFFLE: { + assert(!VT.isScalableVector()); ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask(); // If all the demanded elts are from one operand and are inline, @@ -889,6 +892,11 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( break; } default: + // TODO: Probably okay to remove after audit; here to reduce change size + // in initial enablement patch for scalable vectors + if (VT.isScalableVector()) + return SDValue(); + if (Op.getOpcode() >= ISD::BUILTIN_OP_END) if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode( Op, DemandedBits, DemandedElts, DAG, Depth)) @@ -902,14 +910,10 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG, unsigned Depth) const { EVT VT = Op.getValueType(); - - // Pretend we don't know anything about scalable vectors for now. - // TODO: We can probably do more work on simplifying the operations for - // scalable vectors, but for now we just bail out. - if (VT.isScalableVector()) - return SDValue(); - - APInt DemandedElts = VT.isVector() + // Since the number of lanes in a scalable vector is unknown at compile time, + // we track one bit which is implicitly broadcast to all lanes. This means + // that all lanes in a scalable vector are considered demanded. + APInt DemandedElts = VT.isFixedLengthVector() ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG, @@ -1068,16 +1072,10 @@ bool TargetLowering::SimplifyDemandedBits( // Don't know anything. Known = KnownBits(BitWidth); - // TODO: We can probably do more work on calculating the known bits and - // simplifying the operations for scalable vectors, but for now we just - // bail out. EVT VT = Op.getValueType(); - if (VT.isScalableVector()) - return false; - bool IsLE = TLO.DAG.getDataLayout().isLittleEndian(); unsigned NumElts = OriginalDemandedElts.getBitWidth(); - assert((!VT.isVector() || NumElts == VT.getVectorNumElements()) && + assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) && "Unexpected vector size"); APInt DemandedBits = OriginalDemandedBits; @@ -1089,6 +1087,10 @@ bool TargetLowering::SimplifyDemandedBits( if (Op.isUndef()) return false; + // We can't simplify target constants. + if (Op.getOpcode() == ISD::TargetConstant) + return false; + if (Op.getOpcode() == ISD::Constant) { // We know all of the bits for a constant! Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue()); @@ -1103,17 +1105,16 @@ bool TargetLowering::SimplifyDemandedBits( } // Other users may use these bits. - if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) { - if (Depth != 0) { - // If not at the root, Just compute the Known bits to - // simplify things downstream. - Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); + bool HasMultiUse = false; + if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) { + if (Depth >= SelectionDAG::MaxRecursionDepth) { + // Limit search depth. return false; } - // If this is the root being simplified, allow it to have multiple uses, - // just set the DemandedBits/Elts to all bits. + // Allow multiple uses, just set the DemandedBits/Elts to all bits. DemandedBits = APInt::getAllOnes(BitWidth); DemandedElts = APInt::getAllOnes(NumElts); + HasMultiUse = true; } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) { // Not demanding any bits/elts from Op. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); @@ -1124,9 +1125,9 @@ bool TargetLowering::SimplifyDemandedBits( KnownBits Known2; switch (Op.getOpcode()) { - case ISD::TargetConstant: - llvm_unreachable("Can't simplify this node"); case ISD::SCALAR_TO_VECTOR: { + if (VT.isScalableVector()) + return false; if (!DemandedElts[0]) return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); @@ -1164,6 +1165,8 @@ bool TargetLowering::SimplifyDemandedBits( break; } case ISD::INSERT_VECTOR_ELT: { + if (VT.isScalableVector()) + return false; SDValue Vec = Op.getOperand(0); SDValue Scl = Op.getOperand(1); auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); @@ -1200,6 +1203,8 @@ bool TargetLowering::SimplifyDemandedBits( return false; } case ISD::INSERT_SUBVECTOR: { + if (VT.isScalableVector()) + return false; // Demand any elements from the subvector and the remainder from the src its // inserted into. SDValue Src = Op.getOperand(0); @@ -1243,6 +1248,8 @@ bool TargetLowering::SimplifyDemandedBits( break; } case ISD::EXTRACT_SUBVECTOR: { + if (VT.isScalableVector()) + return false; // Offset the demanded elts by the subvector index. SDValue Src = Op.getOperand(0); if (Src.getValueType().isScalableVector()) @@ -1268,6 +1275,8 @@ bool TargetLowering::SimplifyDemandedBits( break; } case ISD::CONCAT_VECTORS: { + if (VT.isScalableVector()) + return false; Known.Zero.setAllBits(); Known.One.setAllBits(); EVT SubVT = Op.getOperand(0).getValueType(); @@ -1286,28 +1295,14 @@ bool TargetLowering::SimplifyDemandedBits( break; } case ISD::VECTOR_SHUFFLE: { + assert(!VT.isScalableVector()); ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask(); // Collect demanded elements from shuffle operands.. - APInt DemandedLHS(NumElts, 0); - APInt DemandedRHS(NumElts, 0); - for (unsigned i = 0; i != NumElts; ++i) { - if (!DemandedElts[i]) - continue; - int M = ShuffleMask[i]; - if (M < 0) { - // For UNDEF elements, we don't know anything about the common state of - // the shuffle result. - DemandedLHS.clearAllBits(); - DemandedRHS.clearAllBits(); - break; - } - assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range"); - if (M < (int)NumElts) - DemandedLHS.setBit(M); - else - DemandedRHS.setBit(M - NumElts); - } + APInt DemandedLHS, DemandedRHS; + if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS, + DemandedRHS)) + break; if (!!DemandedLHS || !!DemandedRHS) { SDValue Op0 = Op.getOperand(0); @@ -1378,7 +1373,7 @@ bool TargetLowering::SimplifyDemandedBits( // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I) // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits). - if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && + if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() && (Op0.getOperand(0).isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) && Op0->hasOneUse()) { @@ -1745,7 +1740,7 @@ bool TargetLowering::SimplifyDemandedBits( // aren't demanded (as above) and that the shifted upper c1 bits of // x aren't demanded. // TODO - support non-uniform vector amounts. - if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL && + if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() && InnerOp.hasOneUse()) { if (const APInt *SA2 = TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) { @@ -1879,6 +1874,16 @@ bool TargetLowering::SimplifyDemandedBits( Known.One.lshrInPlace(ShAmt); // High bits known zero. Known.Zero.setHighBits(ShAmt); + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0) { + SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } } break; } @@ -2081,10 +2086,10 @@ bool TargetLowering::SimplifyDemandedBits( KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1); KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1); Known = KnownBits::umin(Known0, Known1); - if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1)) - return TLO.CombineTo(Op, IsULE.value() ? Op0 : Op1); - if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1)) - return TLO.CombineTo(Op, IsULT.value() ? Op0 : Op1); + if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1)) + return TLO.CombineTo(Op, *IsULE ? Op0 : Op1); + if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1)) + return TLO.CombineTo(Op, *IsULT ? Op0 : Op1); break; } case ISD::UMAX: { @@ -2094,10 +2099,10 @@ bool TargetLowering::SimplifyDemandedBits( KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1); KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1); Known = KnownBits::umax(Known0, Known1); - if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1)) - return TLO.CombineTo(Op, IsUGE.value() ? Op0 : Op1); - if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1)) - return TLO.CombineTo(Op, IsUGT.value() ? Op0 : Op1); + if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1)) + return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1); + if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1)) + return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1); break; } case ISD::BITREVERSE: { @@ -2225,19 +2230,18 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1)) return true; - Known.Zero = KnownLo.Zero.zext(BitWidth) | - KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth); - - Known.One = KnownLo.One.zext(BitWidth) | - KnownHi.One.zext(BitWidth).shl(HalfBitWidth); + Known = KnownHi.concat(KnownLo); break; } - case ISD::ZERO_EXTEND: - case ISD::ZERO_EXTEND_VECTOR_INREG: { + case ISD::ZERO_EXTEND_VECTOR_INREG: + if (VT.isScalableVector()) + return false; + [[fallthrough]]; + case ISD::ZERO_EXTEND: { SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); unsigned InBits = SrcVT.getScalarSizeInBits(); - unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1; bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG; // If none of the top bits are demanded, convert this into an any_extend. @@ -2269,12 +2273,15 @@ bool TargetLowering::SimplifyDemandedBits( return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc)); break; } - case ISD::SIGN_EXTEND: - case ISD::SIGN_EXTEND_VECTOR_INREG: { + case ISD::SIGN_EXTEND_VECTOR_INREG: + if (VT.isScalableVector()) + return false; + [[fallthrough]]; + case ISD::SIGN_EXTEND: { SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); unsigned InBits = SrcVT.getScalarSizeInBits(); - unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1; bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG; // If none of the top bits are demanded, convert this into an any_extend. @@ -2321,12 +2328,15 @@ bool TargetLowering::SimplifyDemandedBits( return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc)); break; } - case ISD::ANY_EXTEND: - case ISD::ANY_EXTEND_VECTOR_INREG: { + case ISD::ANY_EXTEND_VECTOR_INREG: + if (VT.isScalableVector()) + return false; + [[fallthrough]]; + case ISD::ANY_EXTEND: { SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); unsigned InBits = SrcVT.getScalarSizeInBits(); - unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1; bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG; // If we only need the bottom element then we can just bitcast. @@ -2369,18 +2379,18 @@ bool TargetLowering::SimplifyDemandedBits( // If the input is only used by this truncate, see if we can shrink it based // on the known demanded bits. - if (Src.getNode()->hasOneUse()) { - switch (Src.getOpcode()) { - default: + switch (Src.getOpcode()) { + default: + break; + case ISD::SRL: + // Shrink SRL by a constant if none of the high bits shifted in are + // demanded. + if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT)) + // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is + // undesirable. break; - case ISD::SRL: - // Shrink SRL by a constant if none of the high bits shifted in are - // demanded. - if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT)) - // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is - // undesirable. - break; + if (Src.getNode()->hasOneUse()) { const APInt *ShAmtC = TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts); if (!ShAmtC || ShAmtC->uge(BitWidth)) @@ -2402,8 +2412,8 @@ bool TargetLowering::SimplifyDemandedBits( return TLO.CombineTo( Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt)); } - break; } + break; } assert(!Known.hasConflict() && "Bits known to be one AND zero?"); @@ -2420,6 +2430,7 @@ bool TargetLowering::SimplifyDemandedBits( assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero |= ~InMask; + Known.One &= (~Known.Zero); break; } case ISD::EXTRACT_VECTOR_ELT: { @@ -2464,6 +2475,8 @@ bool TargetLowering::SimplifyDemandedBits( break; } case ISD::BITCAST: { + if (VT.isScalableVector()) + return false; SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits(); @@ -2576,7 +2589,7 @@ bool TargetLowering::SimplifyDemandedBits( SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One); return TLO.CombineTo(Op, And1); } - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::ADD: case ISD::SUB: { // Add, Sub, and Mul don't demand any bits in positions beyond that @@ -2601,6 +2614,11 @@ bool TargetLowering::SimplifyDemandedBits( return true; } + // neg x with only low bit demanded is simply x. + if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() && + isa<ConstantSDNode>(Op0) && cast<ConstantSDNode>(Op0)->isZero()) + return TLO.CombineTo(Op, Op1); + // Attempt to avoid multi-use ops if we don't need anything from them. if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) { SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( @@ -2679,10 +2697,16 @@ bool TargetLowering::SimplifyDemandedBits( } } - LLVM_FALLTHROUGH; + [[fallthrough]]; } default: - if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { + // We also ask the target about intrinsics (which could be specific to it). + if (Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) { + // TODO: Probably okay to remove after audit; here to reduce change size + // in initial enablement patch for scalable vectors + if (Op.getValueType().isScalableVector()) + break; if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts, Known, TLO, Depth)) return true; @@ -2715,6 +2739,12 @@ bool TargetLowering::SimplifyDemandedBits( APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT)); } + // A multi use 'all demanded elts' simplify failed to find any knownbits. + // Try again just for the original demanded elts. + // Ensure we do this AFTER constant folding above. + if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes()) + Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth); + return false; } @@ -2746,7 +2776,7 @@ static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, "Vector binop only"); EVT EltVT = VT.getVectorElementType(); - unsigned NumElts = VT.getVectorNumElements(); + unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1; assert(UndefOp0.getBitWidth() == NumElts && UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis"); @@ -2814,7 +2844,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( } // If Op has other users, assume that all elements are needed. - if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) + if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) DemandedElts.setAllBits(); // Not demanding any elements from Op. @@ -3176,6 +3206,8 @@ bool TargetLowering::SimplifyDemandedVectorElts( break; } case ISD::VECTOR_SHUFFLE: { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask(); // Collect demanded elements from shuffle operands.. @@ -3195,17 +3227,17 @@ bool TargetLowering::SimplifyDemandedVectorElts( // See if we can simplify either shuffle operand. APInt UndefLHS, ZeroLHS; APInt UndefRHS, ZeroRHS; - if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS, - ZeroLHS, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO, + Depth + 1)) return true; - if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS, - ZeroRHS, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO, + Depth + 1)) return true; // Simplify mask using undef elements from LHS/RHS. bool Updated = false; bool IdentityLHS = true, IdentityRHS = true; - SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end()); + SmallVector<int, 32> NewMask(ShuffleMask); for (unsigned i = 0; i != NumElts; ++i) { int &M = NewMask[i]; if (M < 0) @@ -3223,8 +3255,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( // to Identity which can cause premature removal of the shuffle mask. if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) { SDValue LegalShuffle = - buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1), - NewMask, TLO.DAG); + buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG); if (LegalShuffle) return TLO.CombineTo(Op, LegalShuffle); } @@ -3307,7 +3338,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( Depth + 1, /*AssumeSingleUse*/ true)) return true; } - LLVM_FALLTHROUGH; + [[fallthrough]]; } case ISD::OR: case ISD::XOR: @@ -3367,6 +3398,8 @@ bool TargetLowering::SimplifyDemandedVectorElts( break; } case ISD::MUL: + case ISD::MULHU: + case ISD::MULHS: case ISD::AND: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -3375,10 +3408,16 @@ bool TargetLowering::SimplifyDemandedVectorElts( if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO, Depth + 1)) return true; - if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero, + // If we know that a demanded element was zero in Op1 we don't need to + // demand it in Op0 - its guaranteed to be zero. + APInt DemandedElts0 = DemandedElts & ~SrcZero; + if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero, TLO, Depth + 1)) return true; + KnownUndef &= DemandedElts0; + KnownZero &= DemandedElts0; + // If every element pair has a zero/undef then just fold to zero. // fold (and x, undef) -> 0 / (and x, 0) -> 0 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0 @@ -3566,6 +3605,19 @@ bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode( return false; } +bool TargetLowering::canCreateUndefOrPoisonForTargetNode( + SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, + bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use canCreateUndefOrPoison if you don't know whether Op" + " is a target node!"); + // Be conservative and return true. + return true; +} + bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN, @@ -3582,6 +3634,7 @@ bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op, bool TargetLowering::isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, + const SelectionDAG &DAG, unsigned Depth) const { assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || @@ -3692,6 +3745,26 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT); } + // Try to eliminate a power-of-2 mask constant by converting to a signbit + // test in a narrow type that we can truncate to with no cost. Examples: + // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0 + // (i32 X & 32768) != 0 --> (trunc X to i16) < 0 + // TODO: This conservatively checks for type legality on the source and + // destination types. That may inhibit optimizations, but it also + // allows setcc->shift transforms that may be more beneficial. + auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() && + isTypeLegal(OpVT) && N0.hasOneUse()) { + EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(), + AndC->getAPIntValue().getActiveBits()); + if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) { + SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT); + SDValue Zero = DAG.getConstant(0, DL, NarrowVT); + return DAG.getSetCC(DL, VT, Trunc, Zero, + Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT); + } + } + // Match these patterns in any of their permutations: // (X & Y) == Y // (X & Y) != Y @@ -3968,14 +4041,14 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, EVT CTVT = CTPOP.getValueType(); SDValue CTOp = CTPOP.getOperand(0); - // If this is a vector CTPOP, keep the CTPOP if it is legal. - // TODO: Should we check if CTPOP is legal(or custom) for scalars? - if (VT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT)) - return SDValue(); - + // Expand a power-of-2-or-zero comparison based on ctpop: // (ctpop x) u< 2 -> (x & x-1) == 0 // (ctpop x) u> 1 -> (x & x-1) != 0 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) { + // Keep the CTPOP if it is a legal vector op. + if (CTVT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT)) + return SDValue(); + unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond); if (C1.ugt(CostLimit + (Cond == ISD::SETULT))) return SDValue(); @@ -3994,16 +4067,14 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC); } - // If ctpop is not supported, expand a power-of-2 comparison based on it. + // Expand a power-of-2 comparison based on ctpop: + // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0) + // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0) if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) { - // For scalars, keep CTPOP if it is legal or custom. - if (!VT.isVector() && TLI.isOperationLegalOrCustom(ISD::CTPOP, CTVT)) + // Keep the CTPOP if it is legal. + if (TLI.isOperationLegal(ISD::CTPOP, CTVT)) return SDValue(); - // This is based on X86's custom lowering for CTPOP which produces more - // instructions than the expansion here. - // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0) - // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0) SDValue Zero = DAG.getConstant(0, dl, CTVT); SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); assert(CTVT.isInteger()); @@ -4137,6 +4208,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, SelectionDAG &DAG = DCI.DAG; const DataLayout &Layout = DAG.getDataLayout(); EVT OpVT = N0.getValueType(); + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); // Constant fold or commute setcc. if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl)) @@ -4181,6 +4253,23 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG)) return V; + // For equality to 0 of a no-wrap multiply, decompose and test each op: + // X * Y == 0 --> (X == 0) || (Y == 0) + // X * Y != 0 --> (X != 0) && (Y != 0) + // TODO: This bails out if minsize is set, but if the target doesn't have a + // single instruction multiply for this type, it would likely be + // smaller to decompose. + if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) && + N0.getOpcode() == ISD::MUL && N0.hasOneUse() && + (N0->getFlags().hasNoUnsignedWrap() || + N0->getFlags().hasNoSignedWrap()) && + !Attr.hasFnAttr(Attribute::MinSize)) { + SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond); + SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond); + unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND; + return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero); + } + // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an // equality comparison, then we're just comparing whether X itself is // zero. @@ -4970,8 +5059,6 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Fold remainder of division by a constant. if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) && N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { - AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); - // When division is cheap or optimizing for minimum size, // fall through to DIVREM creation by skipping this fold. if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) { @@ -5221,6 +5308,12 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, } } +void TargetLowering::CollectTargetIntrinsicOperands(const CallInst &I, + SmallVectorImpl<SDValue> &Ops, + SelectionDAG &DAG) const { + return; +} + std::pair<unsigned, const TargetRegisterClass *> TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, StringRef Constraint, @@ -5334,11 +5427,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL, OpInfo.CallOperandVal = Call.getArgOperand(ArgNo); break; case InlineAsm::isLabel: - OpInfo.CallOperandVal = - cast<CallBrInst>(&Call)->getBlockAddressForIndirectDest(LabelNo); - OpInfo.ConstraintVT = - getAsmOperandValueType(DL, OpInfo.CallOperandVal->getType()) - .getSimpleVT(); + OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo); ++LabelNo; continue; case InlineAsm::isClobber: @@ -5944,54 +6033,68 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, return SDValue(); } - bool UseNPQ = false; + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Try to use leading zeros of the dividend to reduce the multiplier and + // avoid expensive fixups. + // TODO: Support vectors. + unsigned LeadingZeros = 0; + if (!VT.isVector() && isa<ConstantSDNode>(N1)) { + assert(!isOneConstant(N1) && "Unexpected divisor"); + LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros(); + // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in + // the dividend exceeds the leading zeros for the divisor. + LeadingZeros = + std::min(LeadingZeros, + cast<ConstantSDNode>(N1)->getAPIntValue().countLeadingZeros()); + } + + bool UseNPQ = false, UsePreShift = false, UsePostShift = false; SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors; auto BuildUDIVPattern = [&](ConstantSDNode *C) { if (C->isZero()) return false; - // FIXME: We should use a narrower constant when the upper - // bits are known to be zero. const APInt& Divisor = C->getAPIntValue(); - UnsignedDivisionByConstantInfo magics = - UnsignedDivisionByConstantInfo::get(Divisor); - unsigned PreShift = 0, PostShift = 0; - - // If the divisor is even, we can avoid using the expensive fixup by - // shifting the divided value upfront. - if (magics.IsAdd && !Divisor[0]) { - PreShift = Divisor.countTrailingZeros(); - // Get magic number for the shifted divisor. - magics = - UnsignedDivisionByConstantInfo::get(Divisor.lshr(PreShift), PreShift); - assert(!magics.IsAdd && "Should use cheap fixup now"); - } - - unsigned SelNPQ; - if (!magics.IsAdd || Divisor.isOne()) { - assert(magics.ShiftAmount < Divisor.getBitWidth() && - "We shouldn't generate an undefined shift!"); - PostShift = magics.ShiftAmount; - SelNPQ = false; + + SDValue PreShift, MagicFactor, NPQFactor, PostShift; + + // Magic algorithm doesn't work for division by 1. We need to emit a select + // at the end. + if (Divisor.isOne()) { + PreShift = PostShift = DAG.getUNDEF(ShSVT); + MagicFactor = NPQFactor = DAG.getUNDEF(SVT); } else { - PostShift = magics.ShiftAmount - 1; - SelNPQ = true; - } + UnsignedDivisionByConstantInfo magics = + UnsignedDivisionByConstantInfo::get(Divisor, LeadingZeros); - PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT)); - MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT)); - NPQFactors.push_back( - DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1) - : APInt::getZero(EltBits), - dl, SVT)); - PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT)); - UseNPQ |= SelNPQ; + MagicFactor = DAG.getConstant(magics.Magic, dl, SVT); + + assert(magics.PreShift < Divisor.getBitWidth() && + "We shouldn't generate an undefined shift!"); + assert(magics.PostShift < Divisor.getBitWidth() && + "We shouldn't generate an undefined shift!"); + assert((!magics.IsAdd || magics.PreShift == 0) && + "Unexpected pre-shift"); + PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT); + PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT); + NPQFactor = DAG.getConstant( + magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1) + : APInt::getZero(EltBits), + dl, SVT); + UseNPQ |= magics.IsAdd; + UsePreShift |= magics.PreShift != 0; + UsePostShift |= magics.PostShift != 0; + } + + PreShifts.push_back(PreShift); + MagicFactors.push_back(MagicFactor); + NPQFactors.push_back(NPQFactor); + PostShifts.push_back(PostShift); return true; }; - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - // Collect the shifts/magic values from each element. if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern)) return SDValue(); @@ -6018,8 +6121,10 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, } SDValue Q = N0; - Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift); - Created.push_back(Q.getNode()); + if (UsePreShift) { + Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift); + Created.push_back(Q.getNode()); + } // FIXME: We should support doing a MUL in a wider type. auto GetMULHU = [&](SDValue X, SDValue Y) { @@ -6068,8 +6173,10 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, Created.push_back(Q.getNode()); } - Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift); - Created.push_back(Q.getNode()); + if (UsePostShift) { + Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift); + Created.push_back(Q.getNode()); + } EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); @@ -6921,6 +7028,41 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, OptForSize, Cost, Depth)) return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1)); break; + case ISD::SELECT: + case ISD::VSELECT: { + // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS)) + // iff at least one cost is cheaper and the other is neutral/cheaper + SDValue LHS = Op.getOperand(1); + NegatibleCost CostLHS = NegatibleCost::Expensive; + SDValue NegLHS = + getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth); + if (!NegLHS || CostLHS > NegatibleCost::Neutral) { + RemoveDeadNode(NegLHS); + break; + } + + // Prevent this node from being deleted by the next call. + Handles.emplace_back(NegLHS); + + SDValue RHS = Op.getOperand(2); + NegatibleCost CostRHS = NegatibleCost::Expensive; + SDValue NegRHS = + getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth); + + // We're done with the handles. + Handles.clear(); + + if (!NegRHS || CostRHS > NegatibleCost::Neutral || + (CostLHS != NegatibleCost::Cheaper && + CostRHS != NegatibleCost::Cheaper)) { + RemoveDeadNode(NegLHS); + RemoveDeadNode(NegRHS); + break; + } + + Cost = std::min(CostLHS, CostRHS); + return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS); + } } return SDValue(); @@ -7002,8 +7144,8 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, } if (!VT.isVector() && Opcode == ISD::MUL && - DAG.ComputeNumSignBits(LHS) > InnerBitSize && - DAG.ComputeNumSignBits(RHS) > InnerBitSize) { + DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize && + DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) { // The input values are both sign-extended. // TODO non-MUL case? if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) { @@ -7014,8 +7156,7 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, } unsigned ShiftAmount = OuterBitSize - InnerBitSize; - EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout()); - SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy); + SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl); if (!LH.getNode() && !RH.getNode() && isOperationLegalOrCustom(ISD::SRL, VT) && @@ -7122,6 +7263,190 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, return Ok; } +// Optimize unsigned division or remainder by constants for types twice as large +// as a legal VT. +// +// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder +// can be computed +// as: +// Sum += __builtin_uadd_overflow(Lo, High, &Sum); +// Remainder = Sum % Constant +// This is based on "Remainder by Summing Digits" from Hacker's Delight. +// +// For division, we can compute the remainder using the algorithm described +// above, subtract it from the dividend to get an exact multiple of Constant. +// Then multiply that extact multiply by the multiplicative inverse modulo +// (1 << (BitWidth / 2)) to get the quotient. + +// If Constant is even, we can shift right the dividend and the divisor by the +// number of trailing zeros in Constant before applying the remainder algorithm. +// If we're after the quotient, we can subtract this value from the shifted +// dividend and multiply by the multiplicative inverse of the shifted divisor. +// If we want the remainder, we shift the value left by the number of trailing +// zeros and add the bits that were shifted out of the dividend. +bool TargetLowering::expandDIVREMByConstant(SDNode *N, + SmallVectorImpl<SDValue> &Result, + EVT HiLoVT, SelectionDAG &DAG, + SDValue LL, SDValue LH) const { + unsigned Opcode = N->getOpcode(); + EVT VT = N->getValueType(0); + + // TODO: Support signed division/remainder. + if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM) + return false; + assert( + (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) && + "Unexpected opcode"); + + auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!CN) + return false; + + APInt Divisor = CN->getAPIntValue(); + unsigned BitWidth = Divisor.getBitWidth(); + unsigned HBitWidth = BitWidth / 2; + assert(VT.getScalarSizeInBits() == BitWidth && + HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs"); + + // Divisor needs to less than (1 << HBitWidth). + APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth); + if (Divisor.uge(HalfMaxPlus1)) + return false; + + // We depend on the UREM by constant optimization in DAGCombiner that requires + // high multiply. + if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) && + !isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT)) + return false; + + // Don't expand if optimizing for size. + if (DAG.shouldOptForSize()) + return false; + + // Early out for 0 or 1 divisors. + if (Divisor.ule(1)) + return false; + + // If the divisor is even, shift it until it becomes odd. + unsigned TrailingZeros = 0; + if (!Divisor[0]) { + TrailingZeros = Divisor.countTrailingZeros(); + Divisor.lshrInPlace(TrailingZeros); + } + + SDLoc dl(N); + SDValue Sum; + SDValue PartialRem; + + // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and + // then add in the carry. + // TODO: If we can't split it in half, we might be able to split into 3 or + // more pieces using a smaller bit width. + if (HalfMaxPlus1.urem(Divisor).isOneValue()) { + assert(!LL == !LH && "Expected both input halves or no input halves!"); + if (!LL) { + LL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, N->getOperand(0), + DAG.getIntPtrConstant(0, dl)); + LH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, N->getOperand(0), + DAG.getIntPtrConstant(1, dl)); + } + + // Shift the input by the number of TrailingZeros in the divisor. The + // shifted out bits will be added to the remainder later. + if (TrailingZeros) { + // Save the shifted off bits if we need the remainder. + if (Opcode != ISD::UDIV) { + APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros); + PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL, + DAG.getConstant(Mask, dl, HiLoVT)); + } + + LL = DAG.getNode( + ISD::OR, dl, HiLoVT, + DAG.getNode(ISD::SRL, dl, HiLoVT, LL, + DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)), + DAG.getNode(ISD::SHL, dl, HiLoVT, LH, + DAG.getShiftAmountConstant(HBitWidth - TrailingZeros, + HiLoVT, dl))); + LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH, + DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)); + } + + // Use addcarry if we can, otherwise use a compare to detect overflow. + EVT SetCCType = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT); + if (isOperationLegalOrCustom(ISD::ADDCARRY, HiLoVT)) { + SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType); + Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH); + Sum = DAG.getNode(ISD::ADDCARRY, dl, VTList, Sum, + DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1)); + } else { + Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH); + SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT); + // If the boolean for the target is 0 or 1, we can add the setcc result + // directly. + if (getBooleanContents(HiLoVT) == + TargetLoweringBase::ZeroOrOneBooleanContent) + Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT); + else + Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT), + DAG.getConstant(0, dl, HiLoVT)); + Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry); + } + } + + // If we didn't find a sum, we can't do the expansion. + if (!Sum) + return false; + + // Perform a HiLoVT urem on the Sum using truncated divisor. + SDValue RemL = + DAG.getNode(ISD::UREM, dl, HiLoVT, Sum, + DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT)); + SDValue RemH = DAG.getConstant(0, dl, HiLoVT); + + if (Opcode != ISD::UREM) { + // Subtract the remainder from the shifted dividend. + SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH); + SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH); + + Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem); + + // Multiply by the multiplicative inverse of the divisor modulo + // (1 << BitWidth). + APInt Mod = APInt::getSignedMinValue(BitWidth + 1); + APInt MulFactor = Divisor.zext(BitWidth + 1); + MulFactor = MulFactor.multiplicativeInverse(Mod); + MulFactor = MulFactor.trunc(BitWidth); + + SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend, + DAG.getConstant(MulFactor, dl, VT)); + + // Split the quotient into low and high parts. + SDValue QuotL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient, + DAG.getIntPtrConstant(0, dl)); + SDValue QuotH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient, + DAG.getIntPtrConstant(1, dl)); + Result.push_back(QuotL); + Result.push_back(QuotH); + } + + if (Opcode != ISD::UDIV) { + // If we shifted the input, shift the remainder left and add the bits we + // shifted off the input. + if (TrailingZeros) { + APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros); + RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL, + DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)); + RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem); + } + Result.push_back(RemL); + Result.push_back(DAG.getConstant(0, dl, HiLoVT)); + } + + return true; +} + // Check that (every element of) Z is undef or not an exact multiple of BW. static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) { return ISD::matchUnaryPredicate( @@ -7130,8 +7455,68 @@ static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) { true); } +static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) { + EVT VT = Node->getValueType(0); + SDValue ShX, ShY; + SDValue ShAmt, InvShAmt; + SDValue X = Node->getOperand(0); + SDValue Y = Node->getOperand(1); + SDValue Z = Node->getOperand(2); + SDValue Mask = Node->getOperand(3); + SDValue VL = Node->getOperand(4); + + unsigned BW = VT.getScalarSizeInBits(); + bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL; + SDLoc DL(SDValue(Node, 0)); + + EVT ShVT = Z.getValueType(); + if (isNonZeroModBitWidthOrUndef(Z, BW)) { + // fshl: X << C | Y >> (BW - C) + // fshr: X << (BW - C) | Y >> C + // where C = Z % BW is not zero + SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT); + ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL); + InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL); + ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask, + VL); + ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask, + VL); + } else { + // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW)) + // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW) + SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT); + if (isPowerOf2_32(BW)) { + // Z % BW -> Z & (BW - 1) + ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL); + // (BW - 1) - (Z % BW) -> ~Z & (BW - 1) + SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z, + DAG.getAllOnesConstant(DL, ShVT), Mask, VL); + InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL); + } else { + SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT); + ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL); + InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL); + } + + SDValue One = DAG.getConstant(1, DL, ShVT); + if (IsFSHL) { + ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL); + SDValue ShY1 = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, One, Mask, VL); + ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, ShY1, InvShAmt, Mask, VL); + } else { + SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL); + ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL); + ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, ShAmt, Mask, VL); + } + } + return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL); +} + SDValue TargetLowering::expandFunnelShift(SDNode *Node, SelectionDAG &DAG) const { + if (Node->isVPOpcode()) + return expandVPFunnelShift(Node, DAG); + EVT VT = Node->getValueType(0); if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) || @@ -7919,6 +8304,63 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const { DAG.getConstant(Len - 8, dl, ShVT)); } +SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const { + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue Op = Node->getOperand(0); + SDValue Mask = Node->getOperand(1); + SDValue VL = Node->getOperand(2); + unsigned Len = VT.getScalarSizeInBits(); + assert(VT.isInteger() && "VP_CTPOP not implemented for this type."); + + // TODO: Add support for irregular type lengths. + if (!(Len <= 128 && Len % 8 == 0)) + return SDValue(); + + // This is same algorithm of expandCTPOP from + // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + SDValue Mask55 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT); + SDValue Mask33 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT); + SDValue Mask0F = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT); + + SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5; + + // v = v - ((v >> 1) & 0x55555555...) + Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT, + DAG.getNode(ISD::VP_LSHR, dl, VT, Op, + DAG.getConstant(1, dl, ShVT), Mask, VL), + Mask55, Mask, VL); + Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL); + + // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) + Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL); + Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, + DAG.getNode(ISD::VP_LSHR, dl, VT, Op, + DAG.getConstant(2, dl, ShVT), Mask, VL), + Mask33, Mask, VL); + Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL); + + // v = (v + (v >> 4)) & 0x0F0F0F0F... + Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(4, dl, ShVT), + Mask, VL), + Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL); + Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL); + + if (Len <= 8) + return Op; + + // v = (v * 0x01010101...) >> (Len - 8) + SDValue Mask01 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); + return DAG.getNode(ISD::VP_LSHR, dl, VT, + DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL), + DAG.getConstant(Len - 8, dl, ShVT), Mask, VL); +} + SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const { SDLoc dl(Node); EVT VT = Node->getValueType(0); @@ -7969,6 +8411,77 @@ SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const { return DAG.getNode(ISD::CTPOP, dl, VT, Op); } +SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const { + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue Op = Node->getOperand(0); + SDValue Mask = Node->getOperand(1); + SDValue VL = Node->getOperand(2); + unsigned NumBitsPerElt = VT.getScalarSizeInBits(); + + // do this: + // x = x | (x >> 1); + // x = x | (x >> 2); + // ... + // x = x | (x >>16); + // x = x | (x >>32); // for 64-bit input + // return popcount(~x); + for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) { + SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT); + Op = DAG.getNode(ISD::VP_OR, dl, VT, Op, + DAG.getNode(ISD::VP_LSHR, dl, VT, Op, Tmp, Mask, VL), Mask, + VL); + } + Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getConstant(-1, dl, VT), Mask, + VL); + return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL); +} + +SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG, + const SDLoc &DL, EVT VT, SDValue Op, + unsigned BitWidth) const { + if (BitWidth != 32 && BitWidth != 64) + return SDValue(); + APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U) + : APInt(64, 0x0218A392CD3D5DBFULL); + const DataLayout &TD = DAG.getDataLayout(); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()); + unsigned ShiftAmt = BitWidth - Log2_32(BitWidth); + SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op); + SDValue Lookup = DAG.getNode( + ISD::SRL, DL, VT, + DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg), + DAG.getConstant(DeBruijn, DL, VT)), + DAG.getConstant(ShiftAmt, DL, VT)); + Lookup = DAG.getSExtOrTrunc(Lookup, DL, getPointerTy(TD)); + + SmallVector<uint8_t> Table(BitWidth, 0); + for (unsigned i = 0; i < BitWidth; i++) { + APInt Shl = DeBruijn.shl(i); + APInt Lshr = Shl.lshr(ShiftAmt); + Table[Lshr.getZExtValue()] = i; + } + + // Create a ConstantArray in Constant Pool + auto *CA = ConstantDataArray::get(*DAG.getContext(), Table); + SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD), + TD.getPrefTypeAlign(CA->getType())); + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(), + DAG.getMemBasePlusOffset(CPIdx, Lookup, DL), + PtrInfo, MVT::i8); + if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF) + return ExtLoad; + + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ); + return DAG.getSelect(DL, VT, SrcIsZero, + DAG.getConstant(BitWidth, DL, VT), ExtLoad); +} + SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const { SDLoc dl(Node); EVT VT = Node->getValueType(0); @@ -8002,6 +8515,12 @@ SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const { !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) return SDValue(); + // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal. + if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) && + !isOperationLegal(ISD::CTLZ, VT)) + if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt)) + return V; + // for now, we use: { return popcount(~x & (x - 1)); } // unless the target has ctlz but not ctpop, in which case we use: // { return 32 - nlz(~x & (x-1)); } @@ -8019,6 +8538,22 @@ SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const { return DAG.getNode(ISD::CTPOP, dl, VT, Tmp); } +SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const { + SDValue Op = Node->getOperand(0); + SDValue Mask = Node->getOperand(1); + SDValue VL = Node->getOperand(2); + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + + // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1)) + SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op, + DAG.getConstant(-1, dl, VT), Mask, VL); + SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op, + DAG.getConstant(1, dl, VT), Mask, VL); + SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL); + return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL); +} + SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative) const { SDLoc dl(N); @@ -8092,36 +8627,36 @@ SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const { return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); case MVT::i32: Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); - Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op, + DAG.getConstant(0xFF00, dl, VT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT)); Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); - Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, - DAG.getConstant(0xFF0000, dl, VT)); Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); case MVT::i64: Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); - Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); - Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); - Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op, + DAG.getConstant(255ULL<<8, dl, VT)); + Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT)); + Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op, + DAG.getConstant(255ULL<<16, dl, VT)); + Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT)); + Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op, + DAG.getConstant(255ULL<<24, dl, VT)); + Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT)); Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); - Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); - Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); - Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, - DAG.getConstant(255ULL<<48, dl, VT)); - Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, - DAG.getConstant(255ULL<<40, dl, VT)); - Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, - DAG.getConstant(255ULL<<32, dl, VT)); Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, dl, VT)); + Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, dl, VT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, - DAG.getConstant(255ULL<<8 , dl, VT)); + DAG.getConstant(255ULL<<8, dl, VT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7); Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5); Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); @@ -8132,6 +8667,82 @@ SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const { } } +SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const { + SDLoc dl(N); + EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); + + if (!VT.isSimple()) + return SDValue(); + + EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; + switch (VT.getSimpleVT().getScalarType().SimpleTy) { + default: + return SDValue(); + case MVT::i16: + Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT), + Mask, EVL); + Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT), + Mask, EVL); + return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL); + case MVT::i32: + Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT), + Mask, EVL); + Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT), + Mask, EVL); + Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT), + Mask, EVL); + Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT), + Mask, EVL); + Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2, + DAG.getConstant(0xFF00, dl, VT), Mask, EVL); + Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT), + Mask, EVL); + Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL); + Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL); + return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL); + case MVT::i64: + Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT), + Mask, EVL); + Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op, + DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL); + Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT), + Mask, EVL); + Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op, + DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL); + Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT), + Mask, EVL); + Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op, + DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL); + Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT), + Mask, EVL); + Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT), + Mask, EVL); + Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4, + DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL); + Tmp3 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT), + Mask, EVL); + Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3, + DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL); + Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(40, dl, SHVT), + Mask, EVL); + Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2, + DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL); + Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(56, dl, SHVT), + Mask, EVL); + Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL); + Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL); + Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL); + Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL); + Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL); + Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL); + return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL); + } +} + SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const { SDLoc dl(N); EVT VT = N->getValueType(0); @@ -8194,6 +8805,68 @@ SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const { return Tmp; } +SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const { + assert(N->getOpcode() == ISD::VP_BITREVERSE); + + SDLoc dl(N); + EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); + EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout()); + unsigned Sz = VT.getScalarSizeInBits(); + + SDValue Tmp, Tmp2, Tmp3; + + // If we can, perform BSWAP first and then the mask+swap the i4, then i2 + // and finally the i1 pairs. + // TODO: We can easily support i4/i2 legal types if any target ever does. + if (Sz >= 8 && isPowerOf2_32(Sz)) { + // Create the masks - repeating the pattern every byte. + APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F)); + APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33)); + APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55)); + + // BSWAP if the type is wider than a single byte. + Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op); + + // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4) + Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT), + Mask, EVL); + Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2, + DAG.getConstant(Mask4, dl, VT), Mask, EVL); + Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT), + Mask, EVL); + Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT), + Mask, EVL); + Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL); + + // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2) + Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT), + Mask, EVL); + Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2, + DAG.getConstant(Mask2, dl, VT), Mask, EVL); + Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT), + Mask, EVL); + Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT), + Mask, EVL); + Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL); + + // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1) + Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT), + Mask, EVL); + Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2, + DAG.getConstant(Mask1, dl, VT), Mask, EVL); + Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT), + Mask, EVL); + Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT), + Mask, EVL); + Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL); + return Tmp; + } + return SDValue(); +} + std::pair<SDValue, SDValue> TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const { @@ -8671,7 +9344,7 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask, } else if (DataVT.isScalableVector()) { Increment = DAG.getVScale(DL, AddrVT, APInt(AddrVT.getFixedSizeInBits(), - DataVT.getStoreSize().getKnownMinSize())); + DataVT.getStoreSize().getKnownMinValue())); } else Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT); @@ -8957,9 +9630,13 @@ SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const { assert(VT == RHS.getValueType() && "Expected operands to be the same type"); assert(VT.isInteger() && "Expected operands to be integers"); + if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT)) + return DAG.UnrollVectorOp(Node); + // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate. unsigned BW = VT.getScalarSizeInBits(); + EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS); SDValue Orig = DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS); @@ -8968,14 +9645,14 @@ SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const { if (IsSigned) { SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT); SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT); - SatVal = DAG.getSelectCC(dl, LHS, DAG.getConstant(0, dl, VT), - SatMin, SatMax, ISD::SETLT); + SDValue Cond = + DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT); + SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax); } else { SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT); } - Result = DAG.getSelectCC(dl, LHS, Orig, SatVal, Result, ISD::SETNE); - - return Result; + SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE); + return DAG.getSelect(dl, VT, Cond, SatVal, Result); } SDValue @@ -9665,7 +10342,7 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node, // Store the hi part of CONCAT_VECTORS(V1, V2) SDValue OffsetToV2 = DAG.getVScale( DL, PtrVT, - APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize())); + APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue())); SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2); SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo); @@ -9686,9 +10363,10 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node, DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT); if (TrailingElts > VT.getVectorMinNumElements()) { - SDValue VLBytes = DAG.getVScale( - DL, PtrVT, - APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize())); + SDValue VLBytes = + DAG.getVScale(DL, PtrVT, + APInt(PtrVT.getFixedSizeInBits(), + VT.getStoreSize().getKnownMinValue())); TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes); } @@ -9757,7 +10435,7 @@ bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) && "If SETUE is expanded, SETOEQ or SETUNE must be legal!"); NeedInvert = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETO: assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) && "If SETO is expanded, SETOEQ must be legal!"); @@ -9781,7 +10459,7 @@ bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, NeedInvert = ((unsigned)CCCode & 0x8U); break; } - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETOEQ: case ISD::SETOGT: case ISD::SETOGE: @@ -9802,7 +10480,7 @@ bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, break; } // Fallthrough if we are unsigned integer. - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETLE: case ISD::SETGT: case ISD::SETGE: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp index 5f9ade18f15c..153fe77b8b4a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp @@ -39,6 +39,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Transforms/Utils/EscapeEnumerator.h" #include <cassert> +#include <optional> #include <string> #include <utility> #include <vector> @@ -305,7 +306,7 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) { if (Roots.empty()) return false; - Optional<DomTreeUpdater> DTU; + std::optional<DomTreeUpdater> DTU; if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) DTU.emplace(DTWP->getDomTree(), DomTreeUpdater::UpdateStrategy::Lazy); @@ -320,9 +321,8 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) { Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr, "gc_frame"); - while (isa<AllocaInst>(IP)) - ++IP; - AtEntry.SetInsertPoint(IP->getParent(), IP); + AtEntry.SetInsertPointPastAllocas(&F); + IP = AtEntry.GetInsertPoint(); // Initialize the map pointer and load the current head of the shadow stack. Instruction *CurrentHead = @@ -361,7 +361,7 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) { // For each instruction that escapes... EscapeEnumerator EE(F, "gc_cleanup", /*HandleExceptions=*/true, - DTU ? DTU.getPointer() : nullptr); + DTU ? &*DTU : nullptr); while (IRBuilder<> *AtExit = EE.Next()) { // Pop the entry from the shadow stack. Don't reuse CurrentHead from // AtEntry, since that would make the value live for the entire function. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp index f6ad2b50abcd..2411b1ad5203 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -284,7 +284,7 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, Register PhysReg = MO.getReg(); if (!PhysReg) continue; - assert(Register::isPhysicalRegister(PhysReg) && "Unallocated register?!"); + assert(PhysReg.isPhysical() && "Unallocated register?!"); // The stack pointer is not normally described as a callee-saved register // in calling convention definitions, so we need to watch for it // separately. An SP mentioned by a call instruction, we can ignore, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp index 1fcee02184a9..3fed707a9eb1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -164,7 +164,7 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, // There are still some uses of LPI. Construct an aggregate with the exception // values and replace the LPI with that aggregate. Type *LPadType = LPI->getType(); - Value *LPadVal = UndefValue::get(LPadType); + Value *LPadVal = PoisonValue::get(LPadType); auto *SelI = cast<Instruction>(SelVal); IRBuilder<> Builder(SelI->getParent(), std::next(SelI->getIterator())); LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val"); @@ -183,7 +183,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F, // that needs to be restored on all exits from the function. This is an alloca // because the value needs to be added to the global context list. auto &DL = F.getParent()->getDataLayout(); - const Align Alignment(DL.getPrefTypeAlignment(FunctionContextTy)); + const Align Alignment = DL.getPrefTypeAlign(FunctionContextTy); FuncCtx = new AllocaInst(FunctionContextTy, DL.getAllocaAddrSpace(), nullptr, Alignment, "fn_context", &EntryBB->front()); @@ -391,7 +391,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { lowerAcrossUnwindEdges(F, Invokes); Value *FuncCtx = - setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); + setupFunctionContext(F, ArrayRef(LPads.begin(), LPads.end())); BasicBlock *EntryBB = &F.front(); IRBuilder<> Builder(EntryBB->getTerminator()); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp index 94149f56e703..92e820c9d3d8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "SplitKit.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -323,7 +322,7 @@ unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const { } bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const { - unsigned OrigReg = VRM.getOriginal(CurLI->reg()); + Register OrigReg = VRM.getOriginal(CurLI->reg()); const LiveInterval &Orig = LIS.getInterval(OrigReg); assert(!Orig.empty() && "Splitting empty interval?"); LiveInterval::const_iterator I = Orig.find(Idx); @@ -590,7 +589,7 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, const VNInfo *ParentVNI, bool Late = RegIdx != 0; // Attempt cheap-as-a-copy rematerialization. - unsigned Original = VRM.getOriginal(Edit->get(RegIdx)); + Register Original = VRM.getOriginal(Edit->get(RegIdx)); LiveInterval &OrigLI = LIS.getInterval(Original); VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx); @@ -1450,7 +1449,7 @@ void SplitEditor::deleteRematVictims() { if (Dead.empty()) return; - Edit->eliminateDeadDefs(Dead, None); + Edit->eliminateDeadDefs(Dead, std::nullopt); } void SplitEditor::forceRecomputeVNI(const VNInfo &ParentVNI) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h index 556b022b93fb..5a3428a5e91f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h @@ -32,7 +32,6 @@ namespace llvm { -class AAResults; class LiveInterval; class LiveRange; class LiveIntervals; @@ -488,7 +487,7 @@ public: /// overlapIntv - Indicate that all instructions in range should use the open /// interval if End does not have tied-def usage of the register and in this - /// case compliment interval is used. Let the complement interval be live. + /// case complement interval is used. Let the complement interval be live. /// /// This doubles the register pressure, but is sometimes required to deal with /// register uses after the last valid split point. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp new file mode 100644 index 000000000000..3a48dd5b0a03 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp @@ -0,0 +1,253 @@ +//===-- StackFrameLayoutAnalysisPass.cpp +//------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// StackFrameLayoutAnalysisPass implementation. Outputs information about the +// layout of the stack frame, using the remarks interface. On the CLI it prints +// a textual representation of the stack frame. When possible it prints the +// values that occupy a stack slot using any available debug information. Since +// output is remarks based, it is also available in a machine readable file +// format, such as YAML. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/PrintPasses.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/raw_ostream.h" + +#include <sstream> + +using namespace llvm; + +#define DEBUG_TYPE "stack-frame-layout" + +namespace { + +/// StackFrameLayoutAnalysisPass - This is a pass to dump the stack frame of a +/// MachineFunction. +/// +struct StackFrameLayoutAnalysisPass : public MachineFunctionPass { + using SlotDbgMap = SmallDenseMap<int, SetVector<const DILocalVariable *>>; + static char ID; + + enum SlotType { + Spill, // a Spill slot + StackProtector, // Stack Protector slot + Variable, // a slot used to store a local data (could be a tmp) + Invalid // It's an error for a slot to have this type + }; + + struct SlotData { + int Slot; + int Size; + int Align; + int Offset; + SlotType SlotTy; + + SlotData(const MachineFrameInfo &MFI, const int ValOffset, const int Idx) + : Slot(Idx), Size(MFI.getObjectSize(Idx)), + Align(MFI.getObjectAlign(Idx).value()), + Offset(MFI.getObjectOffset(Idx) - ValOffset), SlotTy(Invalid) { + if (MFI.isSpillSlotObjectIndex(Idx)) + SlotTy = SlotType::Spill; + else if (Idx == MFI.getStackProtectorIndex()) + SlotTy = SlotType::StackProtector; + else + SlotTy = SlotType::Variable; + } + + // we use this to sort in reverse order, so that the layout is displayed + // correctly + bool operator<(const SlotData &Rhs) const { return Offset > Rhs.Offset; } + }; + + StackFrameLayoutAnalysisPass() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "Stack Frame Layout Analysis"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<MachineOptimizationRemarkEmitterPass>(); + } + + bool runOnMachineFunction(MachineFunction &MF) override { + // TODO: We should implement a similar filter for remarks: + // -Rpass-func-filter=<regex> + if (!isFunctionInPrintList(MF.getName())) + return false; + + LLVMContext &Ctx = MF.getFunction().getContext(); + if (!Ctx.getDiagHandlerPtr()->isAnalysisRemarkEnabled(DEBUG_TYPE)) + return false; + + MachineOptimizationRemarkAnalysis Rem(DEBUG_TYPE, "StackLayout", + MF.getFunction().getSubprogram(), + &MF.front()); + Rem << ("\nFunction: " + MF.getName()).str(); + emitStackFrameLayoutRemarks(MF, Rem); + getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE().emit(Rem); + return false; + } + + std::string getTypeString(SlotType Ty) { + switch (Ty) { + case SlotType::Spill: + return "Spill"; + case SlotType::StackProtector: + return "Protector"; + case SlotType::Variable: + return "Variable"; + default: + llvm_unreachable("bad slot type for stack layout"); + } + } + + void emitStackSlotRemark(const MachineFunction &MF, const SlotData &D, + MachineOptimizationRemarkAnalysis &Rem) { + // To make it easy to understand the stack layout from the CLI, we want to + // print each slot like the following: + // + // Offset: [SP+8], Type: Spill, Align: 8, Size: 16 + // foo @ /path/to/file.c:25 + // bar @ /path/to/file.c:35 + // + // Which prints the size, alignment, and offset from the SP at function + // entry. + // + // But we also want the machine readable remarks data to be nicely + // organized. So we print some additional data as strings for the CLI + // output, but maintain more structured data for the YAML. + // + // For example we store the Offset in YAML as: + // ... + // - Offset: -8 + // + // But we print it to the CLI as + // Offset: [SP-8] + + // Negative offsets will print a leading `-`, so only add `+` + std::string Prefix = + formatv("\nOffset: [SP{0}", (D.Offset < 0) ? "" : "+").str(); + Rem << Prefix << ore::NV("Offset", D.Offset) + << "], Type: " << ore::NV("Type", getTypeString(D.SlotTy)) + << ", Align: " << ore::NV("Align", D.Align) + << ", Size: " << ore::NV("Size", D.Size); + } + + void emitSourceLocRemark(const MachineFunction &MF, const DILocalVariable *N, + MachineOptimizationRemarkAnalysis &Rem) { + std::string Loc = + formatv("{0} @ {1}:{2}", N->getName(), N->getFilename(), N->getLine()) + .str(); + Rem << "\n " << ore::NV("DataLoc", Loc); + } + + void emitStackFrameLayoutRemarks(MachineFunction &MF, + MachineOptimizationRemarkAnalysis &Rem) { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!MFI.hasStackObjects()) + return; + + // ValOffset is the offset to the local area from the SP at function entry. + // To display the true offset from SP, we need to subtract ValOffset from + // MFI's ObjectOffset. + const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering(); + const int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0); + + LLVM_DEBUG(dbgs() << "getStackProtectorIndex ==" + << MFI.getStackProtectorIndex() << "\n"); + + std::vector<SlotData> SlotInfo; + + const unsigned int NumObj = MFI.getNumObjects(); + SlotInfo.reserve(NumObj); + // initialize slot info + for (int Idx = MFI.getObjectIndexBegin(), EndIdx = MFI.getObjectIndexEnd(); + Idx != EndIdx; ++Idx) { + if (MFI.isDeadObjectIndex(Idx)) + continue; + SlotInfo.emplace_back(MFI, ValOffset, Idx); + } + + // sort the ordering, to match the actual layout in memory + llvm::sort(SlotInfo); + + SlotDbgMap SlotMap = genSlotDbgMapping(MF); + + for (const SlotData &Info : SlotInfo) { + emitStackSlotRemark(MF, Info, Rem); + for (const DILocalVariable *N : SlotMap[Info.Slot]) + emitSourceLocRemark(MF, N, Rem); + } + } + + // We need to generate a mapping of slots to the values that are stored to + // them. This information is lost by the time we need to print out the frame, + // so we reconstruct it here by walking the CFG, and generating the mapping. + SlotDbgMap genSlotDbgMapping(MachineFunction &MF) { + SlotDbgMap SlotDebugMap; + + // add variables to the map + for (MachineFunction::VariableDbgInfo &DI : MF.getVariableDbgInfo()) + SlotDebugMap[DI.Slot].insert(DI.Var); + + // Then add all the spills that have debug data + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + for (MachineMemOperand *MO : MI.memoperands()) { + if (!MO->isStore()) + continue; + auto *FI = dyn_cast_or_null<FixedStackPseudoSourceValue>( + MO->getPseudoValue()); + if (!FI) + continue; + int FrameIdx = FI->getFrameIndex(); + SmallVector<MachineInstr *> Dbg; + MI.collectDebugValues(Dbg); + + for (MachineInstr *MI : Dbg) + SlotDebugMap[FrameIdx].insert(MI->getDebugVariable()); + } + } + } + + return SlotDebugMap; + } +}; + +char StackFrameLayoutAnalysisPass::ID = 0; +} // namespace + +char &llvm::StackFrameLayoutAnalysisPassID = StackFrameLayoutAnalysisPass::ID; +INITIALIZE_PASS(StackFrameLayoutAnalysisPass, "stack-frame-layout", + "Stack Frame Layout", false, false) + +namespace llvm { +/// Returns a newly-created StackFrameLayout pass. +MachineFunctionPass *createStackFrameLayoutAnalysisPass() { + return new StackFrameLayoutAnalysisPass(); +} + +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp index ccaff862fa3f..bb7a51e49edb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp @@ -146,6 +146,23 @@ unsigned StatepointOpers::getGCPointerMap( return GCMapSize; } +bool StatepointOpers::isFoldableReg(Register Reg) const { + unsigned FoldableAreaStart = getVarIdx(); + for (const MachineOperand &MO : MI->uses()) { + if (MI->getOperandNo(&MO) >= FoldableAreaStart) + break; + if (MO.isReg() && MO.getReg() == Reg) + return false; + } + return true; +} + +bool StatepointOpers::isFoldableReg(const MachineInstr *MI, Register Reg) { + if (MI->getOpcode() != TargetOpcode::STATEPOINT) + return false; + return StatepointOpers(MI).isFoldableReg(Reg); +} + StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) { if (StackMapVersion != 3) llvm_unreachable("Unsupported stackmap version!"); @@ -240,7 +257,7 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, return ++MOI; } - assert(Register::isPhysicalRegister(MOI->getReg()) && + assert(MOI->getReg().isPhysical() && "Virtreg operands should have been rewritten before now."); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MOI->getReg()); assert(!MOI->getSubReg() && "Physical subreg still around."); @@ -688,7 +705,7 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS) { } // Emit alignment to 8 byte. - OS.emitValueToAlignment(8); + OS.emitValueToAlignment(Align(8)); // Num live-out registers and padding to align to 4 byte. OS.emitInt16(0); @@ -700,7 +717,7 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS) { OS.emitIntValue(LO.Size, 1); } // Emit alignment to 8 byte. - OS.emitValueToAlignment(8); + OS.emitValueToAlignment(Align(8)); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp index 510a8e3e4ba2..46685f7b8208 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp @@ -46,6 +46,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <optional> #include <utility> using namespace llvm; @@ -58,10 +60,12 @@ STATISTIC(NumAddrTaken, "Number of local variables that have their address" static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp", cl::init(true), cl::Hidden); +static cl::opt<bool> DisableCheckNoReturn("disable-check-noreturn-call", + cl::init(false), cl::Hidden); char StackProtector::ID = 0; -StackProtector::StackProtector() : FunctionPass(ID), SSPBufferSize(8) { +StackProtector::StackProtector() : FunctionPass(ID) { initializeStackProtectorPass(*PassRegistry::getPassRegistry()); } @@ -82,20 +86,16 @@ void StackProtector::getAnalysisUsage(AnalysisUsage &AU) const { bool StackProtector::runOnFunction(Function &Fn) { F = &Fn; M = F->getParent(); - DominatorTreeWrapperPass *DTWP = - getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - DT = DTWP ? &DTWP->getDomTree() : nullptr; + if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) + DTU.emplace(DTWP->getDomTree(), DomTreeUpdater::UpdateStrategy::Lazy); TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); Trip = TM->getTargetTriple(); TLI = TM->getSubtargetImpl(Fn)->getTargetLowering(); HasPrologue = false; HasIRCheck = false; - Attribute Attr = Fn.getFnAttribute("stack-protector-buffer-size"); - if (Attr.isStringAttribute() && - Attr.getValueAsString().getAsInteger(10, SSPBufferSize)) - return false; // Invalid integer string - + SSPBufferSize = Fn.getFnAttributeAsParsedInteger( + "stack-protector-buffer-size", DefaultSSPBufferSize); if (!RequiresStackProtector()) return false; @@ -108,7 +108,14 @@ bool StackProtector::runOnFunction(Function &Fn) { } ++NumFunProtected; - return InsertStackProtectors(); + bool Changed = InsertStackProtectors(); +#ifdef EXPENSIVE_CHECKS + assert((!DTU || + DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full)) && + "Failed to maintain validity of domtree!"); +#endif + DTU.reset(); + return Changed; } /// \param [out] IsLarge is set to true if a protectable array is found and @@ -166,7 +173,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI, const auto *I = cast<Instruction>(U); // If this instruction accesses memory make sure it doesn't access beyond // the bounds of the allocated object. - Optional<MemoryLocation> MemLoc = MemoryLocation::getOrNone(I); + std::optional<MemoryLocation> MemLoc = MemoryLocation::getOrNone(I); if (MemLoc && MemLoc->Size.hasValue() && !TypeSize::isKnownGE(AllocSize, TypeSize::getFixed(MemLoc->Size.getValue()))) @@ -414,11 +421,11 @@ static Value *getStackGuard(const TargetLoweringBase *TLI, Module *M, /// /// Returns true if the platform/triple supports the stackprotectorcreate pseudo /// node. -static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI, +static bool CreatePrologue(Function *F, Module *M, Instruction *CheckLoc, const TargetLoweringBase *TLI, AllocaInst *&AI) { bool SupportsSelectionDAGSP = false; IRBuilder<> B(&F->getEntryBlock().front()); - PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); + PointerType *PtrTy = Type::getInt8PtrTy(CheckLoc->getContext()); AI = B.CreateAlloca(PtrTy, nullptr, "StackGuardSlot"); Value *GuardSlot = getStackGuard(TLI, M, B, &SupportsSelectionDAGSP); @@ -441,16 +448,33 @@ bool StackProtector::InsertStackProtectors() { TLI->useStackGuardXorFP() || (EnableSelectionDAGSP && !TM->Options.EnableFastISel); AllocaInst *AI = nullptr; // Place on stack that stores the stack guard. + BasicBlock *FailBB = nullptr; for (BasicBlock &BB : llvm::make_early_inc_range(*F)) { - ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator()); - if (!RI) + // This is stack protector auto generated check BB, skip it. + if (&BB == FailBB) + continue; + Instruction *CheckLoc = dyn_cast<ReturnInst>(BB.getTerminator()); + if (!CheckLoc && !DisableCheckNoReturn) { + for (auto &Inst : BB) { + auto *CB = dyn_cast<CallBase>(&Inst); + if (!CB) + continue; + if (!CB->doesNotReturn()) + continue; + // Do stack check before non-return calls (e.g: __cxa_throw) + CheckLoc = CB; + break; + } + } + + if (!CheckLoc) continue; // Generate prologue instrumentation if not already generated. if (!HasPrologue) { HasPrologue = true; - SupportsSelectionDAGSP &= CreatePrologue(F, M, RI, TLI, AI); + SupportsSelectionDAGSP &= CreatePrologue(F, M, CheckLoc, TLI, AI); } // SelectionDAG based code generation. Nothing else needs to be done here. @@ -471,18 +495,17 @@ bool StackProtector::InsertStackProtectors() { // instrumentation has already been generated. HasIRCheck = true; - // If we're instrumenting a block with a musttail call, the check has to be + // If we're instrumenting a block with a tail call, the check has to be // inserted before the call rather than between it and the return. The - // verifier guarantees that a musttail call is either directly before the + // verifier guarantees that a tail call is either directly before the // return or with a single correct bitcast of the return value in between so // we don't need to worry about many situations here. - Instruction *CheckLoc = RI; - Instruction *Prev = RI->getPrevNonDebugInstruction(); - if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isMustTailCall()) + Instruction *Prev = CheckLoc->getPrevNonDebugInstruction(); + if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isTailCall()) CheckLoc = Prev; else if (Prev) { Prev = Prev->getPrevNonDebugInstruction(); - if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isMustTailCall()) + if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isTailCall()) CheckLoc = Prev; } @@ -514,8 +537,8 @@ bool StackProtector::InsertStackProtectors() { // ... // %1 = <stack guard> // %2 = load StackGuardSlot - // %3 = cmp i1 %1, %2 - // br i1 %3, label %SP_return, label %CallStackCheckFailBlk + // %3 = icmp ne i1 %1, %2 + // br i1 %3, label %CallStackCheckFailBlk, label %SP_return // // SP_return: // ret ... @@ -527,38 +550,33 @@ bool StackProtector::InsertStackProtectors() { // Create the FailBB. We duplicate the BB every time since the MI tail // merge pass will merge together all of the various BB into one including // fail BB generated by the stack protector pseudo instruction. - BasicBlock *FailBB = CreateFailBB(); - - // Split the basic block before the return instruction. - BasicBlock *NewBB = - BB.splitBasicBlock(CheckLoc->getIterator(), "SP_return"); - - // Update the dominator tree if we need to. - if (DT && DT->isReachableFromEntry(&BB)) { - DT->addNewBlock(NewBB, &BB); - DT->addNewBlock(FailBB, &BB); - } - - // Remove default branch instruction to the new BB. - BB.getTerminator()->eraseFromParent(); + if (!FailBB) + FailBB = CreateFailBB(); - // Move the newly created basic block to the point right after the old - // basic block so that it's in the "fall through" position. - NewBB->moveAfter(&BB); - - // Generate the stack protector instructions in the old basic block. - IRBuilder<> B(&BB); + IRBuilder<> B(CheckLoc); Value *Guard = getStackGuard(TLI, M, B); LoadInst *LI2 = B.CreateLoad(B.getInt8PtrTy(), AI, true); - Value *Cmp = B.CreateICmpEQ(Guard, LI2); + auto *Cmp = cast<ICmpInst>(B.CreateICmpNE(Guard, LI2)); auto SuccessProb = BranchProbabilityInfo::getBranchProbStackProtector(true); auto FailureProb = BranchProbabilityInfo::getBranchProbStackProtector(false); MDNode *Weights = MDBuilder(F->getContext()) - .createBranchWeights(SuccessProb.getNumerator(), - FailureProb.getNumerator()); - B.CreateCondBr(Cmp, NewBB, FailBB, Weights); + .createBranchWeights(FailureProb.getNumerator(), + SuccessProb.getNumerator()); + + SplitBlockAndInsertIfThen(Cmp, CheckLoc, + /*Unreachable=*/false, Weights, + DTU ? &*DTU : nullptr, + /*LI=*/nullptr, /*ThenBlock=*/FailBB); + + auto *BI = cast<BranchInst>(Cmp->getParent()->getTerminator()); + BasicBlock *NewBB = BI->getSuccessor(1); + NewBB->setName("SP_return"); + NewBB->moveAfter(&BB); + + Cmp->setPredicate(Cmp->getInversePredicate()); + BI->swapSuccessors(); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp index 2282d53e8ffd..83a7063de112 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp @@ -202,11 +202,10 @@ void SwiftErrorValueTracking::propagateVRegs() { // downward defs. bool needPHI = VRegs.size() >= 1 && - llvm::find_if( + llvm::any_of( VRegs, [&](const std::pair<const MachineBasicBlock *, Register> &V) - -> bool { return V.second != VRegs[0].second; }) != - VRegs.end(); + -> bool { return V.second != VRegs[0].second; }); // If there is no upwards exposed used and we don't need a phi just // forward the swifterror vreg from the predecessor(s). diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp index 18507b8fa84f..865add28f781 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp @@ -370,8 +370,10 @@ void TailDuplicator::processPHI( // Remove PredBB from the PHI node. MI->removeOperand(SrcOpIdx + 1); MI->removeOperand(SrcOpIdx); - if (MI->getNumOperands() == 1) + if (MI->getNumOperands() == 1 && !TailBB->hasAddressTaken()) MI->eraseFromParent(); + else if (MI->getNumOperands() == 1) + MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); } /// Duplicate a TailBB instruction to PredBB and update @@ -395,7 +397,7 @@ void TailDuplicator::duplicateInstruction( if (!MO.isReg()) continue; Register Reg = MO.getReg(); - if (!Register::isVirtualRegister(Reg)) + if (!Reg.isVirtual()) continue; if (MO.isDef()) { const TargetRegisterClass *RC = MRI->getRegClass(Reg); @@ -716,8 +718,7 @@ bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) { bool TailDuplicator::duplicateSimpleBB( MachineBasicBlock *TailBB, SmallVectorImpl<MachineBasicBlock *> &TDBBs, - const DenseSet<Register> &UsedByPhi, - SmallVectorImpl<MachineInstr *> &Copies) { + const DenseSet<Register> &UsedByPhi) { SmallPtrSet<MachineBasicBlock *, 8> Succs(TailBB->succ_begin(), TailBB->succ_end()); SmallVector<MachineBasicBlock *, 8> Preds(TailBB->predecessors()); @@ -799,6 +800,15 @@ bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB, return false; if (!PredCond.empty()) return false; + // FIXME: This is overly conservative; it may be ok to relax this in the + // future under more specific conditions. If TailBB is an INLINEASM_BR + // indirect target, we need to see if the edge from PredBB to TailBB is from + // an INLINEASM_BR in PredBB, and then also if that edge was from the + // indirect target list, fallthrough/default target, or potentially both. If + // it's both, TailDuplicator::tailDuplicate will remove the edge, corrupting + // the successor list in PredBB and predecessor list in TailBB. + if (TailBB->isInlineAsmBrIndirectTarget()) + return false; return true; } @@ -826,7 +836,7 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, getRegsUsedByPHIs(*TailBB, &UsedByPhi); if (IsSimple) - return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); + return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi); // Iterate through all the unique predecessors and tail-duplicate this // block into them, if possible. Copying the list ahead of time also diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp index 4116231c005f..0f6cf11ca9d1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/ADT/StringExtras.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -48,8 +49,8 @@ TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, if (OpNum >= MCID.getNumOperands()) return nullptr; - short RegClass = MCID.OpInfo[OpNum].RegClass; - if (MCID.OpInfo[OpNum].isLookupPtrRegClass()) + short RegClass = MCID.operands()[OpNum].RegClass; + if (MCID.operands()[OpNum].isLookupPtrRegClass()) return TRI->getPointerRegClass(MF, RegClass); // Instructions like INSERT_SUBREG do not have fixed register classes. @@ -193,12 +194,10 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool Reg2IsInternal = MI.getOperand(Idx2).isInternalRead(); // Avoid calling isRenamable for virtual registers since we assert that // renamable property is only queried/set for physical registers. - bool Reg1IsRenamable = Register::isPhysicalRegister(Reg1) - ? MI.getOperand(Idx1).isRenamable() - : false; - bool Reg2IsRenamable = Register::isPhysicalRegister(Reg2) - ? MI.getOperand(Idx2).isRenamable() - : false; + bool Reg1IsRenamable = + Reg1.isPhysical() ? MI.getOperand(Idx1).isRenamable() : false; + bool Reg2IsRenamable = + Reg2.isPhysical() ? MI.getOperand(Idx2).isRenamable() : false; // If destination is tied to either of the commuted source register, then // it must be updated. if (HasDef && Reg0 == Reg1 && @@ -238,9 +237,9 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI, CommutedMI->getOperand(Idx1).setIsInternalRead(Reg2IsInternal); // Avoid calling setIsRenamable for virtual registers since we assert that // renamable property is only queried/set for physical registers. - if (Register::isPhysicalRegister(Reg1)) + if (Reg1.isPhysical()) CommutedMI->getOperand(Idx2).setIsRenamable(Reg1IsRenamable); - if (Register::isPhysicalRegister(Reg2)) + if (Reg2.isPhysical()) CommutedMI->getOperand(Idx1).setIsRenamable(Reg2IsRenamable); return CommutedMI; } @@ -338,7 +337,7 @@ bool TargetInstrInfo::PredicateInstruction( return false; for (unsigned j = 0, i = 0, e = MI.getNumOperands(); i != e; ++i) { - if (MCID.OpInfo[i].isPredicate()) { + if (MCID.operands()[i].isPredicate()) { MachineOperand &MO = MI.getOperand(i); if (MO.isReg()) { MO.setReg(Pred[j].getReg()); @@ -455,12 +454,12 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI, Register FoldReg = FoldOp.getReg(); Register LiveReg = LiveOp.getReg(); - assert(Register::isVirtualRegister(FoldReg) && "Cannot fold physregs"); + assert(FoldReg.isVirtual() && "Cannot fold physregs"); const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); const TargetRegisterClass *RC = MRI.getRegClass(FoldReg); - if (Register::isPhysicalRegister(LiveOp.getReg())) + if (LiveOp.getReg().isPhysical()) return RC->contains(LiveOp.getReg()) ? RC : nullptr; if (RC->hasSubClassEq(MRI.getRegClass(LiveReg))) @@ -641,9 +640,10 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, MachineBasicBlock::iterator Pos = MI; if (Flags == MachineMemOperand::MOStore) - storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI); + storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI, + Register()); else - loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI); + loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI, Register()); return &*--Pos; } @@ -705,13 +705,18 @@ bool TargetInstrInfo::hasReassociableOperands( // reassociate. MachineInstr *MI1 = nullptr; MachineInstr *MI2 = nullptr; - if (Op1.isReg() && Register::isVirtualRegister(Op1.getReg())) + if (Op1.isReg() && Op1.getReg().isVirtual()) MI1 = MRI.getUniqueVRegDef(Op1.getReg()); - if (Op2.isReg() && Register::isVirtualRegister(Op2.getReg())) + if (Op2.isReg() && Op2.getReg().isVirtual()) MI2 = MRI.getUniqueVRegDef(Op2.getReg()); - // And they need to be in the trace (otherwise, they won't have a depth). - return MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB; + // And at least one operand must be defined in MBB. + return MI1 && MI2 && (MI1->getParent() == MBB || MI2->getParent() == MBB); +} + +bool TargetInstrInfo::areOpcodesEqualOrInverse(unsigned Opcode1, + unsigned Opcode2) const { + return Opcode1 == Opcode2 || getInverseOpcode(Opcode1) == Opcode2; } bool TargetInstrInfo::hasReassociableSibling(const MachineInstr &Inst, @@ -720,33 +725,39 @@ bool TargetInstrInfo::hasReassociableSibling(const MachineInstr &Inst, const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg()); MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg()); - unsigned AssocOpcode = Inst.getOpcode(); + unsigned Opcode = Inst.getOpcode(); - // If only one operand has the same opcode and it's the second source operand, - // the operands must be commuted. - Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode; + // If only one operand has the same or inverse opcode and it's the second + // source operand, the operands must be commuted. + Commuted = !areOpcodesEqualOrInverse(Opcode, MI1->getOpcode()) && + areOpcodesEqualOrInverse(Opcode, MI2->getOpcode()); if (Commuted) std::swap(MI1, MI2); // 1. The previous instruction must be the same type as Inst. - // 2. The previous instruction must also be associative/commutative (this can - // be different even for instructions with the same opcode if traits like - // fast-math-flags are included). + // 2. The previous instruction must also be associative/commutative or be the + // inverse of such an operation (this can be different even for + // instructions with the same opcode if traits like fast-math-flags are + // included). // 3. The previous instruction must have virtual register definitions for its // operands in the same basic block as Inst. // 4. The previous instruction's result must only be used by Inst. - return MI1->getOpcode() == AssocOpcode && isAssociativeAndCommutative(*MI1) && + return areOpcodesEqualOrInverse(Opcode, MI1->getOpcode()) && + (isAssociativeAndCommutative(*MI1) || + isAssociativeAndCommutative(*MI1, /* Invert */ true)) && hasReassociableOperands(*MI1, MBB) && MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()); } -// 1. The operation must be associative and commutative. +// 1. The operation must be associative and commutative or be the inverse of +// such an operation. // 2. The instruction must have virtual register definitions for its // operands in the same basic block. // 3. The instruction must have a reassociable sibling. bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst, bool &Commuted) const { - return isAssociativeAndCommutative(Inst) && + return (isAssociativeAndCommutative(Inst) || + isAssociativeAndCommutative(Inst, /* Invert */ true)) && hasReassociableOperands(Inst, Inst.getParent()) && hasReassociableSibling(Inst, Commuted); } @@ -800,6 +811,111 @@ TargetInstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const { return false; } +std::pair<unsigned, unsigned> +TargetInstrInfo::getReassociationOpcodes(MachineCombinerPattern Pattern, + const MachineInstr &Root, + const MachineInstr &Prev) const { + bool AssocCommutRoot = isAssociativeAndCommutative(Root); + bool AssocCommutPrev = isAssociativeAndCommutative(Prev); + + // Early exit if both opcodes are associative and commutative. It's a trivial + // reassociation when we only change operands order. In this case opcodes are + // not required to have inverse versions. + if (AssocCommutRoot && AssocCommutPrev) { + assert(Root.getOpcode() == Prev.getOpcode() && "Expected to be equal"); + return std::make_pair(Root.getOpcode(), Root.getOpcode()); + } + + // At least one instruction is not associative or commutative. + // Since we have matched one of the reassociation patterns, we expect that the + // instructions' opcodes are equal or one of them is the inversion of the + // other. + assert(areOpcodesEqualOrInverse(Root.getOpcode(), Prev.getOpcode()) && + "Incorrectly matched pattern"); + unsigned AssocCommutOpcode = Root.getOpcode(); + unsigned InverseOpcode = *getInverseOpcode(Root.getOpcode()); + if (!AssocCommutRoot) + std::swap(AssocCommutOpcode, InverseOpcode); + + // The transformation rule (`+` is any associative and commutative binary + // operation, `-` is the inverse): + // REASSOC_AX_BY: + // (A + X) + Y => A + (X + Y) + // (A + X) - Y => A + (X - Y) + // (A - X) + Y => A - (X - Y) + // (A - X) - Y => A - (X + Y) + // REASSOC_XA_BY: + // (X + A) + Y => (X + Y) + A + // (X + A) - Y => (X - Y) + A + // (X - A) + Y => (X + Y) - A + // (X - A) - Y => (X - Y) - A + // REASSOC_AX_YB: + // Y + (A + X) => (Y + X) + A + // Y - (A + X) => (Y - X) - A + // Y + (A - X) => (Y - X) + A + // Y - (A - X) => (Y + X) - A + // REASSOC_XA_YB: + // Y + (X + A) => (Y + X) + A + // Y - (X + A) => (Y - X) - A + // Y + (X - A) => (Y + X) - A + // Y - (X - A) => (Y - X) + A + switch (Pattern) { + default: + llvm_unreachable("Unexpected pattern"); + case MachineCombinerPattern::REASSOC_AX_BY: + if (!AssocCommutRoot && AssocCommutPrev) + return {AssocCommutOpcode, InverseOpcode}; + if (AssocCommutRoot && !AssocCommutPrev) + return {InverseOpcode, InverseOpcode}; + if (!AssocCommutRoot && !AssocCommutPrev) + return {InverseOpcode, AssocCommutOpcode}; + break; + case MachineCombinerPattern::REASSOC_XA_BY: + if (!AssocCommutRoot && AssocCommutPrev) + return {AssocCommutOpcode, InverseOpcode}; + if (AssocCommutRoot && !AssocCommutPrev) + return {InverseOpcode, AssocCommutOpcode}; + if (!AssocCommutRoot && !AssocCommutPrev) + return {InverseOpcode, InverseOpcode}; + break; + case MachineCombinerPattern::REASSOC_AX_YB: + if (!AssocCommutRoot && AssocCommutPrev) + return {InverseOpcode, InverseOpcode}; + if (AssocCommutRoot && !AssocCommutPrev) + return {AssocCommutOpcode, InverseOpcode}; + if (!AssocCommutRoot && !AssocCommutPrev) + return {InverseOpcode, AssocCommutOpcode}; + break; + case MachineCombinerPattern::REASSOC_XA_YB: + if (!AssocCommutRoot && AssocCommutPrev) + return {InverseOpcode, InverseOpcode}; + if (AssocCommutRoot && !AssocCommutPrev) + return {InverseOpcode, AssocCommutOpcode}; + if (!AssocCommutRoot && !AssocCommutPrev) + return {AssocCommutOpcode, InverseOpcode}; + break; + } + llvm_unreachable("Unhandled combination"); +} + +// Return a pair of boolean flags showing if the new root and new prev operands +// must be swapped. See visual example of the rule in +// TargetInstrInfo::getReassociationOpcodes. +static std::pair<bool, bool> mustSwapOperands(MachineCombinerPattern Pattern) { + switch (Pattern) { + default: + llvm_unreachable("Unexpected pattern"); + case MachineCombinerPattern::REASSOC_AX_BY: + return {false, false}; + case MachineCombinerPattern::REASSOC_XA_BY: + return {true, false}; + case MachineCombinerPattern::REASSOC_AX_YB: + return {true, true}; + case MachineCombinerPattern::REASSOC_XA_YB: + return {true, true}; + } +} + /// Attempt the reassociation transformation to reduce critical path length. /// See the above comments before getMachineCombinerPatterns(). void TargetInstrInfo::reassociateOps( @@ -845,15 +961,15 @@ void TargetInstrInfo::reassociateOps( Register RegY = OpY.getReg(); Register RegC = OpC.getReg(); - if (Register::isVirtualRegister(RegA)) + if (RegA.isVirtual()) MRI.constrainRegClass(RegA, RC); - if (Register::isVirtualRegister(RegB)) + if (RegB.isVirtual()) MRI.constrainRegClass(RegB, RC); - if (Register::isVirtualRegister(RegX)) + if (RegX.isVirtual()) MRI.constrainRegClass(RegX, RC); - if (Register::isVirtualRegister(RegY)) + if (RegY.isVirtual()) MRI.constrainRegClass(RegY, RC); - if (Register::isVirtualRegister(RegC)) + if (RegC.isVirtual()) MRI.constrainRegClass(RegC, RC); // Create a new virtual register for the result of (X op Y) instead of @@ -862,21 +978,35 @@ void TargetInstrInfo::reassociateOps( Register NewVR = MRI.createVirtualRegister(RC); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); - unsigned Opcode = Root.getOpcode(); + auto [NewRootOpc, NewPrevOpc] = getReassociationOpcodes(Pattern, Root, Prev); bool KillA = OpA.isKill(); bool KillX = OpX.isKill(); bool KillY = OpY.isKill(); + bool KillNewVR = true; + + auto [SwapRootOperands, SwapPrevOperands] = mustSwapOperands(Pattern); + + if (SwapPrevOperands) { + std::swap(RegX, RegY); + std::swap(KillX, KillY); + } // Create new instructions for insertion. MachineInstrBuilder MIB1 = - BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR) + BuildMI(*MF, MIMetadata(Prev), TII->get(NewPrevOpc), NewVR) .addReg(RegX, getKillRegState(KillX)) .addReg(RegY, getKillRegState(KillY)) .setMIFlags(Prev.getFlags()); + + if (SwapRootOperands) { + std::swap(RegA, NewVR); + std::swap(KillA, KillNewVR); + } + MachineInstrBuilder MIB2 = - BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC) + BuildMI(*MF, MIMetadata(Root), TII->get(NewRootOpc), RegC) .addReg(RegA, getKillRegState(KillA)) - .addReg(NewVR, getKillRegState(true)) + .addReg(NewVR, getKillRegState(KillNewVR)) .setMIFlags(Root.getFlags()); setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2); @@ -910,6 +1040,10 @@ void TargetInstrInfo::genAlternativeCodeSequence( break; } + // Don't reassociate if Prev and Root are in different blocks. + if (Prev->getParent() != Root.getParent()) + return; + assert(Prev && "Unknown pattern for machine combiner"); reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); @@ -929,7 +1063,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( // doesn't read the other parts of the register. Otherwise it is really a // read-modify-write operation on the full virtual register which cannot be // moved safely. - if (Register::isVirtualRegister(DefReg) && MI.getOperand(0).getSubReg() && + if (DefReg.isVirtual() && MI.getOperand(0).getSubReg() && MI.readsVirtualRegister(DefReg)) return false; @@ -964,7 +1098,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( continue; // Check for a well-behaved physical register. - if (Register::isPhysicalRegister(Reg)) { + if (Reg.isPhysical()) { if (MO.isUse()) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, @@ -1170,7 +1304,7 @@ bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, return (DefCycle != -1 && DefCycle <= 1); } -Optional<ParamLoadedValue> +std::optional<ParamLoadedValue> TargetInstrInfo::describeLoadedValue(const MachineInstr &MI, Register Reg) const { const MachineFunction *MF = MI.getMF(); @@ -1200,7 +1334,7 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI, assert(!TRI->isSuperOrSubRegisterEq(Reg, DestReg) && "TargetInstrInfo::describeLoadedValue can't describe super- or " "sub-regs for copy instructions"); - return None; + return std::nullopt; } else if (auto RegImm = isAddImmediate(MI, Reg)) { Register SrcReg = RegImm->Reg; Offset = RegImm->Imm; @@ -1218,16 +1352,16 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI, // If the address points to "special" memory (e.g. a spill slot), it's // sufficient to check that it isn't aliased by any high-level IR value. if (!PSV || PSV->mayAlias(&MFI)) - return None; + return std::nullopt; const MachineOperand *BaseOp; if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI)) - return None; + return std::nullopt; // FIXME: Scalable offsets are not yet handled in the offset code below. if (OffsetIsScalable) - return None; + return std::nullopt; // TODO: Can currently only handle mem instructions with a single define. // An example from the x86 target: @@ -1236,7 +1370,7 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI, // ... // if (MI.getNumExplicitDefs() != 1) - return None; + return std::nullopt; // TODO: In what way do we need to take Reg into consideration here? @@ -1248,7 +1382,7 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI, return ParamLoadedValue(*BaseOp, Expr); } - return None; + return std::nullopt; } /// Both DefMI and UseMI must be valid. By default, call directly to the @@ -1411,6 +1545,8 @@ void TargetInstrInfo::mergeOutliningCandidateAttributes( const Function &ParentFn = FirstCand.getMF()->getFunction(); if (ParentFn.hasFnAttribute("target-features")) F.addFnAttr(ParentFn.getFnAttribute("target-features")); + if (ParentFn.hasFnAttribute("target-cpu")) + F.addFnAttr(ParentFn.getFnAttribute("target-cpu")); // Set nounwind, so we don't generate eh_frame. if (llvm::all_of(Candidates, [](const outliner::Candidate &C) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp index 9b965109745c..b62374320d75 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -724,6 +724,10 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { // with the Target-specific changes necessary. MaxAtomicSizeInBitsSupported = 1024; + MaxDivRemBitWidthSupported = llvm::IntegerType::MAX_INT_BITS; + + MaxLargeFPConvertBitWidthSupported = llvm::IntegerType::MAX_INT_BITS; + MinCmpXchgSizeInBits = 0; SupportsUnalignedAtomics = false; @@ -868,6 +872,11 @@ void TargetLoweringBase::initActions() { // Named vector shuffles default to expand. setOperationAction(ISD::VECTOR_SPLICE, VT, Expand); + + // VP_SREM/UREM default to expand. + // TODO: Expand all VP intrinsics. + setOperationAction(ISD::VP_SREM, VT, Expand); + setOperationAction(ISD::VP_UREM, VT, Expand); } // Most targets ignore the @llvm.prefetch intrinsic. @@ -950,7 +959,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { // If this is a simple type, use the ComputeRegisterProp mechanism. if (VT.isSimple()) { MVT SVT = VT.getSimpleVT(); - assert((unsigned)SVT.SimpleTy < array_lengthof(TransformToType)); + assert((unsigned)SVT.SimpleTy < std::size(TransformToType)); MVT NVT = TransformToType[SVT.SimpleTy]; LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT); @@ -1342,6 +1351,15 @@ void TargetLoweringBase::computeRegisterProperties( ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat); } + // Decide how to handle f80. If the target does not have native f80 support, + // expand it to i96 and we will be generating soft float library calls. + if (!isTypeLegal(MVT::f80)) { + NumRegistersForVT[MVT::f80] = 3*NumRegistersForVT[MVT::i32]; + RegisterTypeForVT[MVT::f80] = RegisterTypeForVT[MVT::i32]; + TransformToType[MVT::f80] = MVT::i32; + ValueTypeActions.setTypeAction(MVT::f80, TypeSoftenFloat); + } + // Decide how to handle f64. If the target does not have native f64 support, // expand it to i64 and we will be generating soft float library calls. if (!isTypeLegal(MVT::f64)) { @@ -1385,7 +1403,7 @@ void TargetLoweringBase::computeRegisterProperties( NumRegistersForVT[MVT::bf16] = NumRegistersForVT[MVT::f32]; RegisterTypeForVT[MVT::bf16] = RegisterTypeForVT[MVT::f32]; TransformToType[MVT::bf16] = MVT::f32; - ValueTypeActions.setTypeAction(MVT::bf16, TypePromoteFloat); + ValueTypeActions.setTypeAction(MVT::bf16, TypeSoftPromoteHalf); } // Loop over all of the vector value types to see which need transformations. @@ -1424,7 +1442,7 @@ void TargetLoweringBase::computeRegisterProperties( } if (IsLegalWiderType) break; - LLVM_FALLTHROUGH; + [[fallthrough]]; } case TypeWidenVector: @@ -1458,7 +1476,7 @@ void TargetLoweringBase::computeRegisterProperties( break; } } - LLVM_FALLTHROUGH; + [[fallthrough]]; case TypeSplitVector: case TypeScalarizeVector: { @@ -1609,7 +1627,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, if (EVT(DestVT).bitsLT(NewVT)) { // Value is expanded, e.g. i64 -> i16. TypeSize NewVTSize = NewVT.getSizeInBits(); // Convert sizes such as i33 to i64. - if (!isPowerOf2_32(NewVTSize.getKnownMinSize())) + if (!isPowerOf2_32(NewVTSize.getKnownMinValue())) NewVTSize = NewVTSize.coefficientNextPowerOf2(); return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); } @@ -1709,7 +1727,7 @@ uint64_t TargetLoweringBase::getByValTypeAlignment(Type *Ty, bool TargetLoweringBase::allowsMemoryAccessForAlignment( LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, - Align Alignment, MachineMemOperand::Flags Flags, bool *Fast) const { + Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const { // Check if the specified alignment is sufficient based on the data layout. // TODO: While using the data layout works in practice, a better solution // would be to implement this check directly (make this a virtual function). @@ -1719,7 +1737,7 @@ bool TargetLoweringBase::allowsMemoryAccessForAlignment( if (VT.isZeroSized() || Alignment >= DL.getABITypeAlign(Ty)) { // Assume that an access that meets the ABI-specified alignment is fast. if (Fast != nullptr) - *Fast = true; + *Fast = 1; return true; } @@ -1729,7 +1747,7 @@ bool TargetLoweringBase::allowsMemoryAccessForAlignment( bool TargetLoweringBase::allowsMemoryAccessForAlignment( LLVMContext &Context, const DataLayout &DL, EVT VT, - const MachineMemOperand &MMO, bool *Fast) const { + const MachineMemOperand &MMO, unsigned *Fast) const { return allowsMemoryAccessForAlignment(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(), MMO.getFlags(), Fast); } @@ -1738,7 +1756,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, - bool *Fast) const { + unsigned *Fast) const { return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace, Alignment, Flags, Fast); } @@ -1746,7 +1764,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, const MachineMemOperand &MMO, - bool *Fast) const { + unsigned *Fast) const { return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(), MMO.getFlags(), Fast); } @@ -1754,7 +1772,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, LLT Ty, const MachineMemOperand &MMO, - bool *Fast) const { + unsigned *Fast) const { EVT VT = getApproximateEVTForLLT(Ty, DL, Context); return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(), MMO.getFlags(), Fast); @@ -1843,41 +1861,6 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { llvm_unreachable("Unknown instruction type encountered!"); } -std::pair<InstructionCost, MVT> -TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL, - Type *Ty) const { - LLVMContext &C = Ty->getContext(); - EVT MTy = getValueType(DL, Ty); - - InstructionCost Cost = 1; - // We keep legalizing the type until we find a legal kind. We assume that - // the only operation that costs anything is the split. After splitting - // we need to handle two types. - while (true) { - LegalizeKind LK = getTypeConversion(C, MTy); - - if (LK.first == TypeScalarizeScalableVector) { - // Ensure we return a sensible simple VT here, since many callers of this - // function require it. - MVT VT = MTy.isSimple() ? MTy.getSimpleVT() : MVT::i64; - return std::make_pair(InstructionCost::getInvalid(), VT); - } - - if (LK.first == TypeLegal) - return std::make_pair(Cost, MTy.getSimpleVT()); - - if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger) - Cost *= 2; - - // Do not loop with f128 type. - if (MTy == LK.second) - return std::make_pair(Cost, MTy.getSimpleVT()); - - // Keep legalizing the type. - MTy = LK.second; - } -} - Value * TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilderBase &IRB, bool UseTLS) const { @@ -2231,13 +2214,41 @@ int TargetLoweringBase::getDivRefinementSteps(EVT VT, return getOpRefinementSteps(false, VT, getRecipEstimateForFunc(MF)); } +bool TargetLoweringBase::isLoadBitCastBeneficial( + EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, + const MachineMemOperand &MMO) const { + // Single-element vectors are scalarized, so we should generally avoid having + // any memory operations on such types, as they would get scalarized too. + if (LoadVT.isFixedLengthVector() && BitcastVT.isFixedLengthVector() && + BitcastVT.getVectorNumElements() == 1) + return false; + + // Don't do if we could do an indexed load on the original type, but not on + // the new one. + if (!LoadVT.isSimple() || !BitcastVT.isSimple()) + return true; + + MVT LoadMVT = LoadVT.getSimpleVT(); + + // Don't bother doing this if it's just going to be promoted again later, as + // doing so might interfere with other combines. + if (getOperationAction(ISD::LOAD, LoadMVT) == Promote && + getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT()) + return false; + + unsigned Fast = 0; + return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), BitcastVT, + MMO, &Fast) && + Fast; +} + void TargetLoweringBase::finalizeLowering(MachineFunction &MF) const { MF.getRegInfo().freezeReservedRegs(MF); } -MachineMemOperand::Flags -TargetLoweringBase::getLoadMemOperandFlags(const LoadInst &LI, - const DataLayout &DL) const { +MachineMemOperand::Flags TargetLoweringBase::getLoadMemOperandFlags( + const LoadInst &LI, const DataLayout &DL, AssumptionCache *AC, + const TargetLibraryInfo *LibInfo) const { MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad; if (LI.isVolatile()) Flags |= MachineMemOperand::MOVolatile; @@ -2248,7 +2259,9 @@ TargetLoweringBase::getLoadMemOperandFlags(const LoadInst &LI, if (LI.hasMetadata(LLVMContext::MD_invariant_load)) Flags |= MachineMemOperand::MOInvariant; - if (isDereferenceablePointer(LI.getPointerOperand(), LI.getType(), DL)) + if (isDereferenceableAndAlignedPointer(LI.getPointerOperand(), LI.getType(), + LI.getAlign(), DL, &LI, AC, + /*DT=*/nullptr, LibInfo)) Flags |= MachineMemOperand::MODereferenceable; Flags |= getTargetMMOFlags(LI); @@ -2325,7 +2338,7 @@ bool TargetLoweringBase::shouldLocalize(const MachineInstr &MI, auto maxUses = [](unsigned RematCost) { // A cost of 1 means remats are basically free. if (RematCost == 1) - return UINT_MAX; + return std::numeric_limits<unsigned>::max(); if (RematCost == 2) return 2U; @@ -2335,18 +2348,6 @@ bool TargetLoweringBase::shouldLocalize(const MachineInstr &MI, llvm_unreachable("Unexpected remat cost"); }; - // Helper to walk through uses and terminate if we've reached a limit. Saves - // us spending time traversing uses if all we want to know is if it's >= min. - auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) { - unsigned NumUses = 0; - auto UI = MRI.use_instr_nodbg_begin(Reg), UE = MRI.use_instr_nodbg_end(); - for (; UI != UE && NumUses < MaxUses; ++UI) { - NumUses++; - } - // If we haven't reached the end yet then there are more than MaxUses users. - return UI == UE; - }; - switch (MI.getOpcode()) { default: return false; @@ -2363,8 +2364,7 @@ bool TargetLoweringBase::shouldLocalize(const MachineInstr &MI, unsigned MaxUses = maxUses(RematCost); if (MaxUses == UINT_MAX) return true; // Remats are "free" so always localize. - bool B = isUsesAtMost(Reg, MaxUses); - return B; + return MRI.hasAtMostUserInstrs(Reg, MaxUses); } } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 2badbe34ae6a..e760564779c2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -58,6 +58,7 @@ #include "llvm/MC/MCValue.h" #include "llvm/MC/SectionKind.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Base64.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" @@ -127,7 +128,7 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, if (Ctx.getAsmInfo()->getExceptionHandlingType() == ExceptionHandling::ARM) break; // Fallthrough if not using EHABI - LLVM_FALLTHROUGH; + [[fallthrough]]; case Triple::ppc: case Triple::ppcle: case Triple::x86: @@ -289,6 +290,14 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, TTypeEncoding = dwarf::DW_EH_PE_absptr; } break; + case Triple::loongarch32: + case Triple::loongarch64: + LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + break; default: break; } @@ -358,6 +367,31 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, } } + if (NamedMDNode *LLVMStats = M.getNamedMetadata("llvm.stats")) { + // Emit the metadata for llvm statistics into .llvm_stats section, which is + // formatted as a list of key/value pair, the value is base64 encoded. + auto *S = C.getObjectFileInfo()->getLLVMStatsSection(); + Streamer.switchSection(S); + for (const auto *Operand : LLVMStats->operands()) { + const auto *MD = cast<MDNode>(Operand); + assert(MD->getNumOperands() % 2 == 0 && + ("Operand num should be even for a list of key/value pair")); + for (size_t I = 0; I < MD->getNumOperands(); I += 2) { + // Encode the key string size. + auto *Key = cast<MDString>(MD->getOperand(I)); + Streamer.emitULEB128IntValue(Key->getString().size()); + Streamer.emitBytes(Key->getString()); + // Encode the value into a Base64 string. + std::string Value = encodeBase64( + Twine(mdconst::dyn_extract<ConstantInt>(MD->getOperand(I + 1)) + ->getZExtValue()) + .str()); + Streamer.emitULEB128IntValue(Value.size()); + Streamer.emitBytes(Value); + } + } + } + unsigned Version = 0; unsigned Flags = 0; StringRef Section; @@ -400,7 +434,7 @@ void TargetLoweringObjectFileELF::emitPersonalityValue( ELF::SHT_PROGBITS, Flags, 0); unsigned Size = DL.getPointerSize(); Streamer.switchSection(Sec); - Streamer.emitValueToAlignment(DL.getPointerABIAlignment(0).value()); + Streamer.emitValueToAlignment(DL.getPointerABIAlignment(0)); Streamer.emitSymbolAttribute(Label, MCSA_ELF_TypeObject); const MCExpr *E = MCConstantExpr::create(Size, getContext()); Streamer.emitELFSize(Label, E); @@ -636,7 +670,7 @@ getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind, bool HasPrefix = false; if (const auto *F = dyn_cast<Function>(GO)) { - if (Optional<StringRef> Prefix = F->getSectionPrefix()) { + if (std::optional<StringRef> Prefix = F->getSectionPrefix()) { raw_svector_ostream(Name) << '.' << *Prefix; HasPrefix = true; } @@ -1686,7 +1720,7 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal( StringRef COMDATSymName = Sym->getName(); if (const auto *F = dyn_cast<Function>(GO)) - if (Optional<StringRef> Prefix = F->getSectionPrefix()) + if (std::optional<StringRef> Prefix = F->getSectionPrefix()) raw_svector_ostream(Name) << '$' << *Prefix; // Append "$symbol" to the section name *before* IR-level mangling is @@ -1889,11 +1923,24 @@ static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx, // string that sorts between .CRT$XCA and .CRT$XCU. In the general case, we // make a name like ".CRT$XCT12345", since that runs before .CRT$XCU. Really // low priorities need to sort before 'L', since the CRT uses that - // internally, so we use ".CRT$XCA00001" for them. + // internally, so we use ".CRT$XCA00001" for them. We have a contract with + // the frontend that "init_seg(compiler)" corresponds to priority 200 and + // "init_seg(lib)" corresponds to priority 400, and those respectively use + // 'C' and 'L' without the priority suffix. Priorities between 200 and 400 + // use 'C' with the priority as a suffix. SmallString<24> Name; + char LastLetter = 'T'; + bool AddPrioritySuffix = Priority != 200 && Priority != 400; + if (Priority < 200) + LastLetter = 'A'; + else if (Priority < 400) + LastLetter = 'C'; + else if (Priority == 400) + LastLetter = 'L'; raw_svector_ostream OS(Name); - OS << ".CRT$X" << (IsCtor ? "C" : "T") << - (Priority < 200 ? 'A' : 'T') << format("%05u", Priority); + OS << ".CRT$X" << (IsCtor ? "C" : "T") << LastLetter; + if (AddPrioritySuffix) + OS << format("%05u", Priority); MCSectionCOFF *Sec = Ctx.getCOFFSection( Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getReadOnly()); @@ -2245,16 +2292,16 @@ TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV, // function entry point. We choose to always return a function descriptor // here. if (const GlobalObject *GO = dyn_cast<GlobalObject>(GV)) { + if (GO->isDeclarationForLinker()) + return cast<MCSectionXCOFF>(getSectionForExternalReference(GO, TM)) + ->getQualNameSymbol(); + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) if (GVar->hasAttribute("toc-data")) return cast<MCSectionXCOFF>( SectionForGlobal(GVar, SectionKind::getData(), TM)) ->getQualNameSymbol(); - if (GO->isDeclarationForLinker()) - return cast<MCSectionXCOFF>(getSectionForExternalReference(GO, TM)) - ->getQualNameSymbol(); - SectionKind GOKind = getKindForGlobal(GO, TM); if (GOKind.isText()) return cast<MCSectionXCOFF>( @@ -2313,6 +2360,10 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference( if (GO->isThreadLocal()) SMC = XCOFF::XMC_UL; + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GO)) + if (GVar->hasAttribute("toc-data")) + SMC = XCOFF::XMC_TD; + // Externals go into a csect of type ER. return getContext().getXCOFFSection( Name, SectionKind::getMetadata(), @@ -2469,6 +2520,13 @@ void TargetLoweringObjectFileXCOFF::Initialize(MCContext &Ctx, PersonalityEncoding = 0; LSDAEncoding = 0; CallSiteEncoding = dwarf::DW_EH_PE_udata4; + + // AIX debug for thread local location is not ready. And for integrated as + // mode, the relocatable address for the thread local variable will cause + // linker error. So disable the location attribute generation for thread local + // variables for now. + // FIXME: when TLS debug on AIX is ready, remove this setting. + SupportDebugThreadLocalLocation = false; } MCSection *TargetLoweringObjectFileXCOFF::getStaticCtorSection( diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp index 0bd229f4fc68..3127328c363e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -16,8 +16,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/CFLAndersAliasAnalysis.h" -#include "llvm/Analysis/CFLSteensAliasAnalysis.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -49,6 +47,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" #include <cassert> +#include <optional> #include <string> using namespace llvm; @@ -208,18 +207,6 @@ static cl::opt<bool> MISchedPostRA( static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden, cl::desc("Run live interval analysis earlier in the pipeline")); -// Experimental option to use CFL-AA in codegen -static cl::opt<CFLAAType> UseCFLAA( - "use-cfl-aa-in-codegen", cl::init(CFLAAType::None), cl::Hidden, - cl::desc("Enable the new, experimental CFL alias analysis in CodeGen"), - cl::values(clEnumValN(CFLAAType::None, "none", "Disable CFL-AA"), - clEnumValN(CFLAAType::Steensgaard, "steens", - "Enable unification-based CFL-AA"), - clEnumValN(CFLAAType::Andersen, "anders", - "Enable inclusion-based CFL-AA"), - clEnumValN(CFLAAType::Both, "both", - "Enable both variants of CFL-AA"))); - /// Option names for limiting the codegen pipeline. /// Those are used in error reporting and we didn't want /// to duplicate their names all over the place. @@ -339,8 +326,8 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID, static std::string getFSProfileFile(const TargetMachine *TM) { if (!FSProfileFile.empty()) return FSProfileFile.getValue(); - const Optional<PGOOptions> &PGOOpt = TM->getPGOOption(); - if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse) + const std::optional<PGOOptions> &PGOOpt = TM->getPGOOption(); + if (PGOOpt == std::nullopt || PGOOpt->Action != PGOOptions::SampleUse) return std::string(); return PGOOpt->ProfileFile; } @@ -350,8 +337,8 @@ static std::string getFSProfileFile(const TargetMachine *TM) { static std::string getFSRemappingFile(const TargetMachine *TM) { if (!FSRemappingFile.empty()) return FSRemappingFile.getValue(); - const Optional<PGOOptions> &PGOOpt = TM->getPGOOption(); - if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse) + const std::optional<PGOOptions> &PGOOpt = TM->getPGOOption(); + if (PGOOpt == std::nullopt || PGOOpt->Action != PGOOptions::SampleUse) return std::string(); return PGOOpt->ProfileRemappingFile; } @@ -492,7 +479,6 @@ CGPassBuilderOption llvm::getCGPassBuilderOption() { SET_BOOLEAN_OPTION(EnableImplicitNullChecks) SET_BOOLEAN_OPTION(EnableMachineOutliner) SET_BOOLEAN_OPTION(MISchedPostRA) - SET_BOOLEAN_OPTION(UseCFLAA) SET_BOOLEAN_OPTION(DisableMergeICmps) SET_BOOLEAN_OPTION(DisableLSR) SET_BOOLEAN_OPTION(DisableConstantHoisting) @@ -548,7 +534,7 @@ static void registerPartialPipelineCallback(PassInstrumentationCallbacks &PIC, PIC.registerShouldRunOptionalPassCallback( [=, EnableCurrent = StartBefore.empty() && StartAfter.empty(), - EnableNext = Optional<bool>(), StartBeforeCount = 0u, + EnableNext = std::optional<bool>(), StartBeforeCount = 0u, StartAfterCount = 0u, StopBeforeCount = 0u, StopAfterCount = 0u](StringRef P, Any) mutable { bool StartBeforePass = !StartBefore.empty() && P.contains(StartBefore); @@ -857,21 +843,6 @@ void TargetPassConfig::addIRPasses() { addPass(createVerifierPass()); if (getOptLevel() != CodeGenOpt::None) { - switch (UseCFLAA) { - case CFLAAType::Steensgaard: - addPass(createCFLSteensAAWrapperPass()); - break; - case CFLAAType::Andersen: - addPass(createCFLAndersAAWrapperPass()); - break; - case CFLAAType::Both: - addPass(createCFLAndersAAWrapperPass()); - addPass(createCFLSteensAAWrapperPass()); - break; - default: - break; - } - // Basic AliasAnalysis support. // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that // BasicAliasAnalysis wins if they disagree. This is intended to help @@ -904,7 +875,7 @@ void TargetPassConfig::addIRPasses() { addPass(&ShadowStackGCLoweringID); addPass(createLowerConstantIntrinsicsPass()); - // For MachO, lower @llvm.global_dtors into @llvm_global_ctors with + // For MachO, lower @llvm.global_dtors into @llvm.global_ctors with // __cxa_atexit() calls to avoid emitting the deprecated __mod_term_func. if (TM->getTargetTriple().isOSBinFormatMachO() && TM->Options.LowerGlobalDtorsViaCxaAtExit) @@ -960,7 +931,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { // pad is shared by multiple invokes and is also a target of a normal // edge from elsewhere. addPass(createSjLjEHPreparePass(TM)); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: case ExceptionHandling::AIX: @@ -1058,13 +1029,13 @@ bool TargetPassConfig::addCoreISelPasses() { // pass manager into two. GlobalISel with the fallback path disabled // and -run-pass seem to be unaffected. The majority of GlobalISel // testing uses -run-pass so this probably isn't too bad. - SaveAndRestore<bool> SavedDebugifyIsSafe(DebugifyIsSafe); + SaveAndRestore SavedDebugifyIsSafe(DebugifyIsSafe); if (Selector != SelectorType::GlobalISel || !isGlobalISelAbortEnabled()) DebugifyIsSafe = false; // Add instruction selector passes. if (Selector == SelectorType::GlobalISel) { - SaveAndRestore<bool> SavedAddingMachinePasses(AddingMachinePasses, true); + SaveAndRestore SavedAddingMachinePasses(AddingMachinePasses, true); if (addIRTranslator()) return true; @@ -1113,6 +1084,8 @@ bool TargetPassConfig::addISelPasses() { addPass(createPreISelIntrinsicLoweringPass()); PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); + addPass(createExpandLargeDivRemPass()); + addPass(createExpandLargeFpConvertPass()); addIRPasses(); addCodeGenPrepare(); addPassesToHandleExceptions(); @@ -1267,6 +1240,7 @@ void TargetPassConfig::addMachinePasses() { addPass(&StackMapLivenessID); addPass(&LiveDebugValuesID); + addPass(&MachineSanitizerBinaryMetadataID); if (TM->Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None && EnableMachineOutliner != RunOutliner::NeverOutline) { @@ -1296,6 +1270,8 @@ void TargetPassConfig::addMachinePasses() { if (!DisableCFIFixup && TM->Options.EnableCFIFixup) addPass(createCFIFixup()); + PM->add(createStackFrameLayoutAnalysisPass()); + // Add passes that directly emit MI after all other MI passes. addPreEmitPass2(); @@ -1519,6 +1495,9 @@ void TargetPassConfig::addOptimizedRegAlloc() { /// Add passes that optimize machine instructions after register allocation. void TargetPassConfig::addMachineLateOptimization() { + // Cleanup of redundant immediate/address loads. + addPass(&MachineLateInstrsCleanupID); + // Branch folding must be run after regalloc and prolog/epilog insertion. addPass(&BranchFolderPassID); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp index ac346585b0f8..a41d5999d961 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -115,7 +115,7 @@ Printable printReg(Register Reg, const TargetRegisterInfo *TRI, OS << "$noreg"; else if (Register::isStackSlot(Reg)) OS << "SS#" << Register::stackSlot2Index(Reg); - else if (Register::isVirtualRegister(Reg)) { + else if (Reg.isVirtual()) { StringRef Name = MRI ? MRI->getVRegName(Reg) : ""; if (Name != "") { OS << '%' << Name; @@ -571,10 +571,14 @@ bool TargetRegisterInfo::getCoveringSubRegIndexes( break; } - // Try to cover as much of the remaining lanes as possible but - // as few of the already covered lanes as possible. - int Cover = (SubRegMask & LanesLeft).getNumLanes() - - (SubRegMask & ~LanesLeft).getNumLanes(); + // Do not cover already-covered lanes to avoid creating cycles + // in copy bundles (= bundle contains copies that write to the + // registers). + if ((SubRegMask & ~LanesLeft).any()) + continue; + + // Try to cover as many of the remaining lanes as possible. + const int Cover = (SubRegMask & LanesLeft).getNumLanes(); if (Cover > BestCover) { BestCover = Cover; BestIdx = Idx; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp index ac07c86cab85..dba84950f49d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp @@ -26,6 +26,7 @@ #include <algorithm> #include <cassert> #include <cstdint> +#include <numeric> using namespace llvm; @@ -43,22 +44,6 @@ bool TargetSchedModel::hasInstrItineraries() const { return EnableSchedItins && !InstrItins.isEmpty(); } -static unsigned gcd(unsigned Dividend, unsigned Divisor) { - // Dividend and Divisor will be naturally swapped as needed. - while (Divisor) { - unsigned Rem = Dividend % Divisor; - Dividend = Divisor; - Divisor = Rem; - }; - return Dividend; -} - -static unsigned lcm(unsigned A, unsigned B) { - unsigned LCM = (uint64_t(A) * B) / gcd(A, B); - assert((LCM >= A && LCM >= B) && "LCM overflow"); - return LCM; -} - void TargetSchedModel::init(const TargetSubtargetInfo *TSInfo) { STI = TSInfo; SchedModel = TSInfo->getSchedModel(); @@ -71,7 +56,7 @@ void TargetSchedModel::init(const TargetSubtargetInfo *TSInfo) { for (unsigned Idx = 0; Idx < NumRes; ++Idx) { unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits; if (NumUnits > 0) - ResourceLCM = lcm(ResourceLCM, NumUnits); + ResourceLCM = std::lcm(ResourceLCM, NumUnits); } MicroOpFactor = ResourceLCM / SchedModel.IssueWidth; for (unsigned Idx = 0; Idx < NumRes; ++Idx) { @@ -237,9 +222,9 @@ unsigned TargetSchedModel::computeOperandLatency( // If DefIdx does not exist in the model (e.g. implicit defs), then return // unit latency (defaultDefLatency may be too conservative). #ifndef NDEBUG - if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() - && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef() - && SchedModel.isComplete()) { + if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() && + !DefMI->getDesc().operands()[DefOperIdx].isOptionalDef() && + SchedModel.isComplete()) { errs() << "DefIdx " << DefIdx << " exceeds machine model writes for " << *DefMI << " (Try with MCSchedModel.CompleteModel set to false)"; llvm_unreachable("incomplete machine model"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 17fe819fa900..8cb3667aea28 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1657,13 +1657,50 @@ bool TwoAddressInstructionPass::processStatepoint( if (RegA == RegB) continue; + // CodeGenPrepare can sink pointer compare past statepoint, which + // breaks assumption that statepoint kills tied-use register when + // in SSA form (see note in IR/SafepointIRVerifier.cpp). Fall back + // to generic tied register handling to avoid assertion failures. + // TODO: Recompute LIS/LV information for new range here. + if (LIS) { + const auto &UseLI = LIS->getInterval(RegB); + const auto &DefLI = LIS->getInterval(RegA); + if (DefLI.overlaps(UseLI)) { + LLVM_DEBUG(dbgs() << "LIS: " << printReg(RegB, TRI, 0) + << " UseLI overlaps with DefLI\n"); + NeedCopy = true; + continue; + } + } else if (LV && LV->getVarInfo(RegB).findKill(MI->getParent()) != MI) { + // Note that MachineOperand::isKill does not work here, because it + // is set only on first register use in instruction and for statepoint + // tied-use register will usually be found in preceeding deopt bundle. + LLVM_DEBUG(dbgs() << "LV: " << printReg(RegB, TRI, 0) + << " not killed by statepoint\n"); + NeedCopy = true; + continue; + } + + if (!MRI->constrainRegClass(RegB, MRI->getRegClass(RegA))) { + LLVM_DEBUG(dbgs() << "MRI: couldn't constrain" << printReg(RegB, TRI, 0) + << " to register class of " << printReg(RegA, TRI, 0) + << '\n'); + NeedCopy = true; + continue; + } MRI->replaceRegWith(RegA, RegB); if (LIS) { VNInfo::Allocator &A = LIS->getVNInfoAllocator(); LiveInterval &LI = LIS->getInterval(RegB); - for (auto &S : LIS->getInterval(RegA)) { - VNInfo *VNI = LI.getNextValue(S.start, A); + LiveInterval &Other = LIS->getInterval(RegA); + SmallVector<VNInfo *> NewVNIs; + for (const VNInfo *VNI : Other.valnos) { + assert(VNI->id == NewVNIs.size() && "assumed"); + NewVNIs.push_back(LI.createValueCopy(VNI, A)); + } + for (auto &S : Other) { + VNInfo *VNI = NewVNIs[S.valno->id]; LiveRange::Segment NewSeg(S.start, S.end, VNI); LI.addSegment(NewSeg); } @@ -1676,6 +1713,7 @@ bool TwoAddressInstructionPass::processStatepoint( LiveVariables::VarInfo &SrcInfo = LV->getVarInfo(RegB); LiveVariables::VarInfo &DstInfo = LV->getVarInfo(RegA); SrcInfo.AliveBlocks |= DstInfo.AliveBlocks; + DstInfo.AliveBlocks.clear(); for (auto *KillMI : DstInfo.Kills) LV->addVirtualRegisterKilled(RegB, *KillMI, false); } @@ -1857,11 +1895,6 @@ void TwoAddressInstructionPass:: eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; Register DstReg = MI.getOperand(0).getReg(); - if (MI.getOperand(0).getSubReg() || DstReg.isPhysical() || - !(MI.getNumOperands() & 1)) { - LLVM_DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI); - llvm_unreachable(nullptr); - } SmallVector<Register, 4> OrigRegs; if (LIS) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp index 36e3c1245f1c..e6c0b3242d67 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp @@ -15,8 +15,10 @@ /// //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/TypePromotion.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLowering.h" @@ -106,9 +108,9 @@ class IRPromoter { SetVector<Value *> &Sources; SetVector<Instruction *> &Sinks; SmallPtrSetImpl<Instruction *> &SafeWrap; + SmallPtrSetImpl<Instruction *> &InstsToRemove; IntegerType *ExtTy = nullptr; SmallPtrSet<Value *, 8> NewInsts; - SmallPtrSet<Instruction *, 4> InstsToRemove; DenseMap<Value *, SmallVector<Type *, 4>> TruncTysMap; SmallPtrSet<Value *, 8> Promoted; @@ -120,25 +122,26 @@ class IRPromoter { void Cleanup(); public: - IRPromoter(LLVMContext &C, unsigned Width, - SetVector<Value *> &visited, SetVector<Value *> &sources, - SetVector<Instruction *> &sinks, - SmallPtrSetImpl<Instruction *> &wrap) - : Ctx(C), PromotedWidth(Width), Visited(visited), - Sources(sources), Sinks(sinks), SafeWrap(wrap) { + IRPromoter(LLVMContext &C, unsigned Width, SetVector<Value *> &visited, + SetVector<Value *> &sources, SetVector<Instruction *> &sinks, + SmallPtrSetImpl<Instruction *> &wrap, + SmallPtrSetImpl<Instruction *> &instsToRemove) + : Ctx(C), PromotedWidth(Width), Visited(visited), Sources(sources), + Sinks(sinks), SafeWrap(wrap), InstsToRemove(instsToRemove) { ExtTy = IntegerType::get(Ctx, PromotedWidth); } void Mutate(); }; -class TypePromotion : public FunctionPass { +class TypePromotionImpl { unsigned TypeSize = 0; LLVMContext *Ctx = nullptr; unsigned RegisterBitWidth = 0; SmallPtrSet<Value *, 16> AllVisited; SmallPtrSet<Instruction *, 8> SafeToPromote; SmallPtrSet<Instruction *, 4> SafeWrap; + SmallPtrSet<Instruction *, 4> InstsToRemove; // Does V have the same size result type as TypeSize. bool EqualTypeSize(Value *V); @@ -166,17 +169,25 @@ class TypePromotion : public FunctionPass { // Is V an instruction thats result can trivially promoted, or has safe // wrapping. bool isLegalToPromote(Value *V); - bool TryToPromote(Value *V, unsigned PromotedWidth); + bool TryToPromote(Value *V, unsigned PromotedWidth, const LoopInfo &LI); + +public: + bool run(Function &F, const TargetMachine *TM, + const TargetTransformInfo &TTI, const LoopInfo &LI); +}; +class TypePromotionLegacy : public FunctionPass { public: static char ID; - TypePromotion() : FunctionPass(ID) {} + TypePromotionLegacy() : FunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<LoopInfoWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); AU.addRequired<TargetPassConfig>(); AU.setPreservesCFG(); + AU.addPreserved<LoopInfoWrapperPass>(); } StringRef getPassName() const override { return PASS_NAME; } @@ -192,19 +203,19 @@ static bool GenerateSignBits(Instruction *I) { Opc == Instruction::SRem || Opc == Instruction::SExt; } -bool TypePromotion::EqualTypeSize(Value *V) { +bool TypePromotionImpl::EqualTypeSize(Value *V) { return V->getType()->getScalarSizeInBits() == TypeSize; } -bool TypePromotion::LessOrEqualTypeSize(Value *V) { +bool TypePromotionImpl::LessOrEqualTypeSize(Value *V) { return V->getType()->getScalarSizeInBits() <= TypeSize; } -bool TypePromotion::GreaterThanTypeSize(Value *V) { +bool TypePromotionImpl::GreaterThanTypeSize(Value *V) { return V->getType()->getScalarSizeInBits() > TypeSize; } -bool TypePromotion::LessThanTypeSize(Value *V) { +bool TypePromotionImpl::LessThanTypeSize(Value *V) { return V->getType()->getScalarSizeInBits() < TypeSize; } @@ -215,7 +226,7 @@ bool TypePromotion::LessThanTypeSize(Value *V) { /// return values because we only accept ones that guarantee a zeroext ret val. /// Many arguments will have the zeroext attribute too, so those would be free /// too. -bool TypePromotion::isSource(Value *V) { +bool TypePromotionImpl::isSource(Value *V) { if (!isa<IntegerType>(V->getType())) return false; @@ -236,7 +247,7 @@ bool TypePromotion::isSource(Value *V) { /// Return true if V will require any promoted values to be truncated for the /// the IR to remain valid. We can't mutate the value type of these /// instructions. -bool TypePromotion::isSink(Value *V) { +bool TypePromotionImpl::isSink(Value *V) { // TODO The truncate also isn't actually necessary because we would already // proved that the data value is kept within the range of the original data // type. We currently remove any truncs inserted for handling zext sinks. @@ -262,7 +273,7 @@ bool TypePromotion::isSink(Value *V) { } /// Return whether this instruction can safely wrap. -bool TypePromotion::isSafeWrap(Instruction *I) { +bool TypePromotionImpl::isSafeWrap(Instruction *I) { // We can support a potentially wrapping instruction (I) if: // - It is only used by an unsigned icmp. // - The icmp uses a constant. @@ -368,7 +379,7 @@ bool TypePromotion::isSafeWrap(Instruction *I) { return false; } -bool TypePromotion::shouldPromote(Value *V) { +bool TypePromotionImpl::shouldPromote(Value *V) { if (!isa<IntegerType>(V->getType()) || isSink(V)) return false; @@ -551,8 +562,13 @@ void IRPromoter::TruncateSinks() { } // Don't insert a trunc for a zext which can still legally promote. + // Nor insert a trunc when the input value to that trunc has the same width + // as the zext we are inserting it for. When this happens the input operand + // for the zext will be promoted to the same width as the zext's return type + // rendering that zext unnecessary. This zext gets removed before the end + // of the pass. if (auto ZExt = dyn_cast<ZExtInst>(I)) - if (ZExt->getType()->getScalarSizeInBits() > PromotedWidth) + if (ZExt->getType()->getScalarSizeInBits() >= PromotedWidth) continue; // Now handle the others. @@ -599,7 +615,6 @@ void IRPromoter::Cleanup() { for (auto *I : InstsToRemove) { LLVM_DEBUG(dbgs() << "IR Promotion: Removing " << *I << "\n"); I->dropAllReferences(); - I->eraseFromParent(); } } @@ -675,7 +690,7 @@ void IRPromoter::Mutate() { /// We disallow booleans to make life easier when dealing with icmps but allow /// any other integer that fits in a scalar register. Void types are accepted /// so we can handle switches. -bool TypePromotion::isSupportedType(Value *V) { +bool TypePromotionImpl::isSupportedType(Value *V) { Type *Ty = V->getType(); // Allow voids and pointers, these won't be promoted. @@ -693,7 +708,7 @@ bool TypePromotion::isSupportedType(Value *V) { /// Disallow casts other than zext and truncs and only allow calls if their /// return value is zeroext. We don't allow opcodes that can introduce sign /// bits. -bool TypePromotion::isSupportedValue(Value *V) { +bool TypePromotionImpl::isSupportedValue(Value *V) { if (auto *I = dyn_cast<Instruction>(V)) { switch (I->getOpcode()) { default: @@ -741,7 +756,7 @@ bool TypePromotion::isSupportedValue(Value *V) { /// Check that the type of V would be promoted and that the original type is /// smaller than the targeted promoted type. Check that we're not trying to /// promote something larger than our base 'TypeSize' type. -bool TypePromotion::isLegalToPromote(Value *V) { +bool TypePromotionImpl::isLegalToPromote(Value *V) { auto *I = dyn_cast<Instruction>(V); if (!I) return true; @@ -756,9 +771,10 @@ bool TypePromotion::isLegalToPromote(Value *V) { return false; } -bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) { +bool TypePromotionImpl::TryToPromote(Value *V, unsigned PromotedWidth, + const LoopInfo &LI) { Type *OrigTy = V->getType(); - TypeSize = OrigTy->getPrimitiveSizeInBits().getFixedSize(); + TypeSize = OrigTy->getPrimitiveSizeInBits().getFixedValue(); SafeToPromote.clear(); SafeWrap.clear(); @@ -850,95 +866,134 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) { unsigned ToPromote = 0; unsigned NonFreeArgs = 0; + unsigned NonLoopSources = 0, LoopSinks = 0; SmallPtrSet<BasicBlock *, 4> Blocks; - for (auto *V : CurrentVisited) { - if (auto *I = dyn_cast<Instruction>(V)) + for (auto *CV : CurrentVisited) { + if (auto *I = dyn_cast<Instruction>(CV)) Blocks.insert(I->getParent()); - if (Sources.count(V)) { - if (auto *Arg = dyn_cast<Argument>(V)) + if (Sources.count(CV)) { + if (auto *Arg = dyn_cast<Argument>(CV)) if (!Arg->hasZExtAttr() && !Arg->hasSExtAttr()) ++NonFreeArgs; + if (!isa<Instruction>(CV) || + !LI.getLoopFor(cast<Instruction>(CV)->getParent())) + ++NonLoopSources; continue; } - if (Sinks.count(cast<Instruction>(V))) + if (isa<PHINode>(CV)) + continue; + if (LI.getLoopFor(cast<Instruction>(CV)->getParent())) + ++LoopSinks; + if (Sinks.count(cast<Instruction>(CV))) continue; ++ToPromote; } // DAG optimizations should be able to handle these cases better, especially // for function arguments. - if (ToPromote < 2 || (Blocks.size() == 1 && (NonFreeArgs > SafeWrap.size()))) + if (!isa<PHINode>(V) && !(LoopSinks && NonLoopSources) && + (ToPromote < 2 || (Blocks.size() == 1 && NonFreeArgs > SafeWrap.size()))) return false; IRPromoter Promoter(*Ctx, PromotedWidth, CurrentVisited, Sources, Sinks, - SafeWrap); + SafeWrap, InstsToRemove); Promoter.Mutate(); return true; } -bool TypePromotion::runOnFunction(Function &F) { - if (skipFunction(F) || DisablePromotion) +bool TypePromotionImpl::run(Function &F, const TargetMachine *TM, + const TargetTransformInfo &TTI, + const LoopInfo &LI) { + if (DisablePromotion) return false; LLVM_DEBUG(dbgs() << "IR Promotion: Running on " << F.getName() << "\n"); - auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); - if (!TPC) - return false; - AllVisited.clear(); SafeToPromote.clear(); SafeWrap.clear(); bool MadeChange = false; const DataLayout &DL = F.getParent()->getDataLayout(); - const TargetMachine &TM = TPC->getTM<TargetMachine>(); - const TargetSubtargetInfo *SubtargetInfo = TM.getSubtargetImpl(F); + const TargetSubtargetInfo *SubtargetInfo = TM->getSubtargetImpl(F); const TargetLowering *TLI = SubtargetInfo->getTargetLowering(); - const TargetTransformInfo &TII = - getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); RegisterBitWidth = - TII.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar).getFixedSize(); + TTI.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar).getFixedValue(); Ctx = &F.getParent()->getContext(); - // Search up from icmps to try to promote their operands. + // Return the preferred integer width of the instruction, or zero if we + // shouldn't try. + auto GetPromoteWidth = [&](Instruction *I) -> uint32_t { + if (!isa<IntegerType>(I->getType())) + return 0; + + EVT SrcVT = TLI->getValueType(DL, I->getType()); + if (SrcVT.isSimple() && TLI->isTypeLegal(SrcVT.getSimpleVT())) + return 0; + + if (TLI->getTypeAction(*Ctx, SrcVT) != TargetLowering::TypePromoteInteger) + return 0; + + EVT PromotedVT = TLI->getTypeToTransformTo(*Ctx, SrcVT); + if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) { + LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register " + << "for promoted type\n"); + return 0; + } + + // TODO: Should we prefer to use RegisterBitWidth instead? + return PromotedVT.getFixedSizeInBits(); + }; + + auto BBIsInLoop = [&](BasicBlock *BB) -> bool { + for (auto *L : LI) + if (L->contains(BB)) + return true; + return false; + }; + for (BasicBlock &BB : F) { for (Instruction &I : BB) { if (AllVisited.count(&I)) continue; - if (!isa<ICmpInst>(&I)) - continue; - - auto *ICmp = cast<ICmpInst>(&I); - // Skip signed or pointer compares - if (ICmp->isSigned() || !isa<IntegerType>(ICmp->getOperand(0)->getType())) - continue; - - LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << *ICmp << "\n"); - - for (auto &Op : ICmp->operands()) { - if (auto *I = dyn_cast<Instruction>(Op)) { - EVT SrcVT = TLI->getValueType(DL, I->getType()); - if (SrcVT.isSimple() && TLI->isTypeLegal(SrcVT.getSimpleVT())) - break; - - if (TLI->getTypeAction(*Ctx, SrcVT) != - TargetLowering::TypePromoteInteger) - break; - EVT PromotedVT = TLI->getTypeToTransformTo(*Ctx, SrcVT); - if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) { - LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register " - << "for promoted type\n"); - break; + if (isa<ZExtInst>(&I) && isa<PHINode>(I.getOperand(0)) && + isa<IntegerType>(I.getType()) && BBIsInLoop(&BB)) { + LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << I.getOperand(0) + << "\n"); + EVT ZExtVT = TLI->getValueType(DL, I.getType()); + Instruction *Phi = static_cast<Instruction *>(I.getOperand(0)); + auto PromoteWidth = ZExtVT.getFixedSizeInBits(); + if (RegisterBitWidth < PromoteWidth) { + LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target " + << "register for ZExt type\n"); + continue; + } + MadeChange |= TryToPromote(Phi, PromoteWidth, LI); + } else if (auto *ICmp = dyn_cast<ICmpInst>(&I)) { + // Search up from icmps to try to promote their operands. + // Skip signed or pointer compares + if (ICmp->isSigned()) + continue; + + LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << *ICmp << "\n"); + + for (auto &Op : ICmp->operands()) { + if (auto *OpI = dyn_cast<Instruction>(Op)) { + if (auto PromotedWidth = GetPromoteWidth(OpI)) { + MadeChange |= TryToPromote(OpI, PromotedWidth, LI); + break; + } } - - MadeChange |= TryToPromote(I, PromotedVT.getFixedSizeInBits()); - break; } } } + if (!InstsToRemove.empty()) { + for (auto *I : InstsToRemove) + I->eraseFromParent(); + InstsToRemove.clear(); + } } AllVisited.clear(); @@ -948,9 +1003,46 @@ bool TypePromotion::runOnFunction(Function &F) { return MadeChange; } -INITIALIZE_PASS_BEGIN(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false) -INITIALIZE_PASS_END(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false) +INITIALIZE_PASS_BEGIN(TypePromotionLegacy, DEBUG_TYPE, PASS_NAME, false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(TypePromotionLegacy, DEBUG_TYPE, PASS_NAME, false, false) + +char TypePromotionLegacy::ID = 0; -char TypePromotion::ID = 0; +bool TypePromotionLegacy::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; -FunctionPass *llvm::createTypePromotionPass() { return new TypePromotion(); } + auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); + if (!TPC) + return false; + + auto *TM = &TPC->getTM<TargetMachine>(); + auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + + TypePromotionImpl TP; + return TP.run(F, TM, TTI, LI); +} + +FunctionPass *llvm::createTypePromotionLegacyPass() { + return new TypePromotionLegacy(); +} + +PreservedAnalyses TypePromotionPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &TTI = AM.getResult<TargetIRAnalysis>(F); + auto &LI = AM.getResult<LoopAnalysis>(F); + TypePromotionImpl TP; + + bool Changed = TP.run(F, TM, TTI, LI); + if (!Changed) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + PA.preserve<LoopAnalysis>(); + return PA; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp index 8225d4ea6996..88460971338c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp @@ -582,7 +582,7 @@ int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) { for (const auto &P : PD) { if (!P.isValid()) continue; - // The pressure differences are computed bottom-up, so the comparision for + // The pressure differences are computed bottom-up, so the comparison for // an increase is positive in the bottom direction, but negative in the // top-down direction. if (HighPressureSets[P.getPSet()]) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp index f577aff39ea7..608434800bc3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp @@ -232,10 +232,16 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { return FixedVectorType::get(Type::getInt1Ty(Context), 512); case MVT::v1024i1: return FixedVectorType::get(Type::getInt1Ty(Context), 1024); + case MVT::v2048i1: + return FixedVectorType::get(Type::getInt1Ty(Context), 2048); case MVT::v128i2: return FixedVectorType::get(Type::getIntNTy(Context, 2), 128); + case MVT::v256i2: + return FixedVectorType::get(Type::getIntNTy(Context, 2), 256); case MVT::v64i4: return FixedVectorType::get(Type::getIntNTy(Context, 4), 64); + case MVT::v128i4: + return FixedVectorType::get(Type::getIntNTy(Context, 4), 128); case MVT::v1i8: return FixedVectorType::get(Type::getInt8Ty(Context), 1); case MVT::v2i8: @@ -296,6 +302,14 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { return FixedVectorType::get(Type::getInt32Ty(Context), 7); case MVT::v8i32: return FixedVectorType::get(Type::getInt32Ty(Context), 8); + case MVT::v9i32: + return FixedVectorType::get(Type::getInt32Ty(Context), 9); + case MVT::v10i32: + return FixedVectorType::get(Type::getInt32Ty(Context), 10); + case MVT::v11i32: + return FixedVectorType::get(Type::getInt32Ty(Context), 11); + case MVT::v12i32: + return FixedVectorType::get(Type::getInt32Ty(Context), 12); case MVT::v16i32: return FixedVectorType::get(Type::getInt32Ty(Context), 16); case MVT::v32i32: @@ -388,6 +402,14 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { return FixedVectorType::get(Type::getFloatTy(Context), 7); case MVT::v8f32: return FixedVectorType::get(Type::getFloatTy(Context), 8); + case MVT::v9f32: + return FixedVectorType::get(Type::getFloatTy(Context), 9); + case MVT::v10f32: + return FixedVectorType::get(Type::getFloatTy(Context), 10); + case MVT::v11f32: + return FixedVectorType::get(Type::getFloatTy(Context), 11); + case MVT::v12f32: + return FixedVectorType::get(Type::getFloatTy(Context), 12); case MVT::v16f32: return FixedVectorType::get(Type::getFloatTy(Context), 16); case MVT::v32f32: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp index 069aca742da0..f80b06d7e9b7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp @@ -145,7 +145,7 @@ void VirtRegMap::assignVirt2StackSlot(Register virtReg, int SS) { void VirtRegMap::print(raw_ostream &OS, const Module*) const { OS << "********** REGISTER MAP **********\n"; for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = Register::index2VirtReg(i); + Register Reg = Register::index2VirtReg(i); if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) { OS << '[' << printReg(Reg, TRI) << " -> " << printReg(Virt2PhysMap[Reg], TRI) << "] " @@ -154,7 +154,7 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const { } for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = Register::index2VirtReg(i); + Register Reg = Register::index2VirtReg(i); if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) { OS << '[' << printReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg] << "] " << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n"; @@ -475,7 +475,7 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const { // clobbering. for (int E = MIs.size(), PrevE = E; E > 1; PrevE = E) { for (int I = E; I--; ) - if (!anyRegsAlias(MIs[I], makeArrayRef(MIs).take_front(E), TRI)) { + if (!anyRegsAlias(MIs[I], ArrayRef(MIs).take_front(E), TRI)) { if (I + 1 != E) std::swap(MIs[I], MIs[E - 1]); --E; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp index aa6645227edb..361f185243b1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp @@ -182,8 +182,7 @@ bool WasmEHPrepare::prepareThrows(Function &F) { Changed = true; auto *BB = ThrowI->getParent(); SmallVector<BasicBlock *, 4> Succs(successors(BB)); - auto &InstList = BB->getInstList(); - InstList.erase(std::next(BasicBlock::iterator(ThrowI)), InstList.end()); + BB->erase(std::next(BasicBlock::iterator(ThrowI)), BB->end()); IRB.SetInsertPoint(BB); IRB.CreateUnreachable(); eraseDeadBBsAndChildren(Succs); @@ -253,7 +252,7 @@ bool WasmEHPrepare::prepareEHPads(Function &F) { auto *CPI = cast<CatchPadInst>(BB->getFirstNonPHI()); // In case of a single catch (...), we don't need to emit a personalify // function call - if (CPI->getNumArgOperands() == 1 && + if (CPI->arg_size() == 1 && cast<Constant>(CPI->getArgOperand(0))->isNullValue()) prepareEHPad(BB, false); else diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp index b835503ee9ed..dfca2be0a114 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp @@ -556,8 +556,8 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn, // Create the entry for this cleanup with the appropriate handler // properties. Finally and fault handlers are distinguished by arity. ClrHandlerType HandlerType = - (Cleanup->getNumArgOperands() ? ClrHandlerType::Fault - : ClrHandlerType::Finally); + (Cleanup->arg_size() ? ClrHandlerType::Fault + : ClrHandlerType::Finally); int CleanupState = addClrEHHandler(FuncInfo, HandlerParentState, -1, HandlerType, 0, Pad->getParent()); // Queue any child EH pads on the worklist. @@ -733,7 +733,7 @@ void WinEHPrepare::demotePHIsOnFunclets(Function &F, for (auto *PN : PHINodes) { // There may be lingering uses on other EH PHIs being removed - PN->replaceAllUsesWith(UndefValue::get(PN->getType())); + PN->replaceAllUsesWith(PoisonValue::get(PN->getType())); PN->eraseFromParent(); } } @@ -847,10 +847,7 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) { ColorVector &IncomingColors = BlockColors[IncomingBlock]; assert(!IncomingColors.empty() && "Block not colored!"); assert((IncomingColors.size() == 1 || - llvm::all_of(IncomingColors, - [&](BasicBlock *Color) { - return Color != FuncletPadBB; - })) && + !llvm::is_contained(IncomingColors, FuncletPadBB)) && "Cloning should leave this funclet's blocks monochromatic"); EdgeTargetsFunclet = (IncomingColors.front() == FuncletPadBB); } @@ -1215,8 +1212,8 @@ void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot, BranchInst *Goto = cast<BranchInst>(IncomingBlock->getTerminator()); Goto->removeFromParent(); CatchRet->removeFromParent(); - IncomingBlock->getInstList().push_back(CatchRet); - NewBlock->getInstList().push_back(Goto); + CatchRet->insertInto(IncomingBlock, IncomingBlock->end()); + Goto->insertInto(NewBlock, NewBlock->end()); Goto->setSuccessor(0, PHIBlock); CatchRet->setSuccessor(NewBlock); // Update the color mapping for the newly split edge. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp index b66429d8a5bf..13f45ae048bb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp @@ -151,19 +151,18 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { InstrAttr.getValueAsString() == "xray-never"; if (NeverInstrument && !AlwaysInstrument) return false; - auto ThresholdAttr = F.getFnAttribute("xray-instruction-threshold"); auto IgnoreLoopsAttr = F.getFnAttribute("xray-ignore-loops"); - unsigned int XRayThreshold = 0; - if (!AlwaysInstrument) { - if (!ThresholdAttr.isStringAttribute()) - return false; // XRay threshold attribute not found. - if (ThresholdAttr.getValueAsString().getAsInteger(10, XRayThreshold)) - return false; // Invalid value for threshold. + uint64_t XRayThreshold = 0; + if (!AlwaysInstrument) { bool IgnoreLoops = IgnoreLoopsAttr.isValid(); + XRayThreshold = F.getFnAttributeAsParsedInteger( + "xray-instruction-threshold", std::numeric_limits<uint64_t>::max()); + if (XRayThreshold == std::numeric_limits<uint64_t>::max()) + return false; // Count the number of MachineInstr`s in MachineFunction - int64_t MICount = 0; + uint64_t MICount = 0; for (const auto &MBB : MF) MICount += MBB.size(); |