diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen')
86 files changed, 3494 insertions, 1632 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 5c64622c7245..bb71d72256d8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -120,8 +120,7 @@ bool AggressiveAntiDepState::IsLive(unsigned Reg) { AggressiveAntiDepBreaker::AggressiveAntiDepBreaker( MachineFunction &MFi, const RegisterClassInfo &RCI, TargetSubtargetInfo::RegClassVector &CriticalPathRCs) - : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), - TII(MF.getSubtarget().getInstrInfo()), + : MF(MFi), MRI(MF.getRegInfo()), TII(MF.getSubtarget().getInstrInfo()), TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI) { /* Collect a bitset of all registers that are only broken if they are on the critical path. */ diff --git a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp index 7e68e5e22879..e8fef505e43d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp @@ -577,9 +577,9 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I, bool &ADS = AllowDifferingSizes ? *AllowDifferingSizes : DummyADS; ADS = true; - AttrBuilder CallerAttrs(F->getAttributes(), AttributeList::ReturnIndex); - AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(), - AttributeList::ReturnIndex); + AttrBuilder CallerAttrs(F->getContext(), F->getAttributes().getRetAttrs()); + AttrBuilder CalleeAttrs(F->getContext(), + cast<CallInst>(I)->getAttributes().getRetAttrs()); // Following attributes are completely benign as far as calling convention // goes, they shouldn't affect whether the call is a tail call. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp index 964cef75d164..03e63321e3c4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp @@ -23,6 +23,8 @@ namespace llvm { AIXException::AIXException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {} +void AIXException::markFunctionEnd() { endFragment(); } + void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA, const MCSymbol *PerSym) { // Generate EH Info Table. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 533f20535655..4f3f798fe6f8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -247,6 +247,11 @@ void AsmPrinter::emitInitialRawDwarfLocDirective(const MachineFunction &MF) { if (DD) { assert(OutStreamer->hasRawTextSupport() && "Expected assembly output mode."); + // This is NVPTX specific and it's unclear why. + // PR51079: If we have code without debug information we need to give up. + DISubprogram *MFSP = MF.getFunction().getSubprogram(); + if (!MFSP) + return; (void)DD->emitInitialLocDirective(MF, /*CUID=*/0); } } @@ -2477,7 +2482,8 @@ void AsmPrinter::emitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, // two boundary. If a global value is specified, and if that global has // an explicit alignment requested, it will override the alignment request // if required for correctness. -void AsmPrinter::emitAlignment(Align Alignment, const GlobalObject *GV) const { +void AsmPrinter::emitAlignment(Align Alignment, const GlobalObject *GV, + unsigned MaxBytesToEmit) const { if (GV) Alignment = getGVAlignment(GV, GV->getParent()->getDataLayout(), Alignment); @@ -2490,9 +2496,9 @@ void AsmPrinter::emitAlignment(Align Alignment, const GlobalObject *GV) const { STI = &getSubtargetInfo(); else STI = TM.getMCSubtargetInfo(); - OutStreamer->emitCodeAlignment(Alignment.value(), STI); + OutStreamer->emitCodeAlignment(Alignment.value(), STI, MaxBytesToEmit); } else - OutStreamer->emitValueToAlignment(Alignment.value()); + OutStreamer->emitValueToAlignment(Alignment.value(), 0, 1, MaxBytesToEmit); } //===----------------------------------------------------------------------===// @@ -3286,7 +3292,7 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { // Emit an alignment directive for this block, if needed. const Align Alignment = MBB.getAlignment(); if (Alignment != Align(1)) - emitAlignment(Alignment); + emitAlignment(Alignment, nullptr, MBB.getMaxBytesForAlignment()); // Switch to a new section if this basic block must begin a section. The // entry block is always placed in the function section and is handled @@ -3648,6 +3654,12 @@ unsigned int AsmPrinter::getDwarfOffsetByteSize() const { OutStreamer->getContext().getDwarfFormat()); } +dwarf::FormParams AsmPrinter::getDwarfFormParams() const { + return {getDwarfVersion(), uint8_t(getPointerSize()), + OutStreamer->getContext().getDwarfFormat(), + MAI->doesDwarfUseRelocationsAcrossSections()}; +} + unsigned int AsmPrinter::getUnitLengthFieldByteSize() const { return dwarf::getUnitLengthFieldByteSize( OutStreamer->getContext().getDwarfFormat()); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h index 5e7db1f2f76c..bd2c60eadd61 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -33,6 +33,7 @@ class ByteStreamer { virtual void emitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0; virtual void emitULEB128(uint64_t DWord, const Twine &Comment = "", unsigned PadTo = 0) = 0; + virtual unsigned emitDIERef(const DIE &D) = 0; }; class APByteStreamer final : public ByteStreamer { @@ -54,15 +55,24 @@ public: AP.OutStreamer->AddComment(Comment); AP.emitULEB128(DWord, nullptr, PadTo); } + unsigned emitDIERef(const DIE &D) override { + uint64_t Offset = D.getOffset(); + static constexpr unsigned ULEB128PadSize = 4; + assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit"); + emitULEB128(Offset, "", ULEB128PadSize); + // Return how many comments to skip in DwarfDebug::emitDebugLocEntry to keep + // comments aligned with debug loc entries. + return ULEB128PadSize; + } }; class HashingByteStreamer final : public ByteStreamer { private: DIEHash &Hash; public: - HashingByteStreamer(DIEHash &H) : Hash(H) {} - void emitInt8(uint8_t Byte, const Twine &Comment) override { - Hash.update(Byte); + HashingByteStreamer(DIEHash &H) : Hash(H) {} + void emitInt8(uint8_t Byte, const Twine &Comment) override { + Hash.update(Byte); } void emitSLEB128(uint64_t DWord, const Twine &Comment) override { Hash.addSLEB128(DWord); @@ -71,6 +81,10 @@ class HashingByteStreamer final : public ByteStreamer { unsigned PadTo) override { Hash.addULEB128(DWord); } + unsigned emitDIERef(const DIE &D) override { + Hash.hashRawTypeReference(D); + return 0; // Only used together with the APByteStreamer. + } }; class BufferByteStreamer final : public ByteStreamer { @@ -115,9 +129,15 @@ public: // with each other. for (size_t i = 1; i < Length; ++i) Comments.push_back(""); - } } + unsigned emitDIERef(const DIE &D) override { + uint64_t Offset = D.getOffset(); + static constexpr unsigned ULEB128PadSize = 4; + assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit"); + emitULEB128(Offset, "", ULEB128PadSize); + return 0; // Only used together with the APByteStreamer. + } }; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index d621108408f0..52c74713551c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -68,6 +68,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -600,6 +601,8 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) { return SourceLanguage::D; case dwarf::DW_LANG_Swift: return SourceLanguage::Swift; + case dwarf::DW_LANG_Rust: + return SourceLanguage::Rust; default: // There's no CodeView representation for this language, and CV doesn't // have an "unknown" option for the language field, so we'll use MASM, @@ -843,6 +846,12 @@ void CodeViewDebug::emitCompilerInformation() { if (MMI->getModule()->getProfileSummary(/*IsCS*/ false) != nullptr) { Flags |= static_cast<uint32_t>(CompileSym3Flags::PGO); } + using ArchType = llvm::Triple::ArchType; + ArchType Arch = Triple(MMI->getModule()->getTargetTriple()).getArch(); + if (Asm->TM.Options.Hotpatch || Arch == ArchType::thumb || + Arch == ArchType::aarch64) { + Flags |= static_cast<uint32_t>(CompileSym3Flags::HotPatch); + } OS.AddComment("Flags and language"); OS.emitInt32(Flags); @@ -857,8 +866,10 @@ void CodeViewDebug::emitCompilerInformation() { StringRef CompilerVersion = CU->getProducer(); Version FrontVer = parseVersion(CompilerVersion); OS.AddComment("Frontend version"); - for (int N : FrontVer.Part) + for (int N : FrontVer.Part) { + N = std::min<int>(N, std::numeric_limits<uint16_t>::max()); OS.emitInt16(N); + } // Some Microsoft tools, like Binscope, expect a backend version number of at // least 8.something, so we'll coerce the LLVM version into a form that @@ -885,6 +896,34 @@ static TypeIndex getStringIdTypeIdx(GlobalTypeTableBuilder &TypeTable, return TypeTable.writeLeafType(SIR); } +static std::string flattenCommandLine(ArrayRef<std::string> Args, + StringRef MainFilename) { + std::string FlatCmdLine; + raw_string_ostream OS(FlatCmdLine); + bool PrintedOneArg = false; + if (!StringRef(Args[0]).contains("-cc1")) { + llvm::sys::printArg(OS, "-cc1", /*Quote=*/true); + PrintedOneArg = true; + } + for (unsigned i = 0; i < Args.size(); i++) { + StringRef Arg = Args[i]; + if (Arg.empty()) + continue; + if (Arg == "-main-file-name" || Arg == "-o") { + i++; // Skip this argument and next one. + continue; + } + if (Arg.startswith("-object-file-name") || Arg == MainFilename) + continue; + if (PrintedOneArg) + OS << " "; + llvm::sys::printArg(OS, Arg, /*Quote=*/true); + PrintedOneArg = true; + } + OS.flush(); + return FlatCmdLine; +} + void CodeViewDebug::emitBuildInfo() { // First, make LF_BUILDINFO. It's a sequence of strings with various bits of // build info. The known prefix is: @@ -905,8 +944,16 @@ void CodeViewDebug::emitBuildInfo() { getStringIdTypeIdx(TypeTable, MainSourceFile->getDirectory()); BuildInfoArgs[BuildInfoRecord::SourceFile] = getStringIdTypeIdx(TypeTable, MainSourceFile->getFilename()); - // FIXME: Path to compiler and command line. PDB is intentionally blank unless - // we implement /Zi type servers. + // FIXME: PDB is intentionally blank unless we implement /Zi type servers. + BuildInfoArgs[BuildInfoRecord::TypeServerPDB] = + getStringIdTypeIdx(TypeTable, ""); + if (Asm->TM.Options.MCOptions.Argv0 != nullptr) { + BuildInfoArgs[BuildInfoRecord::BuildTool] = + getStringIdTypeIdx(TypeTable, Asm->TM.Options.MCOptions.Argv0); + BuildInfoArgs[BuildInfoRecord::CommandLine] = getStringIdTypeIdx( + TypeTable, flattenCommandLine(Asm->TM.Options.MCOptions.CommandLineArgs, + MainSourceFile->getFilename())); + } BuildInfoRecord BIR(BuildInfoArgs); TypeIndex BuildInfoIndex = TypeTable.writeLeafType(BIR); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 2834d9c3ebbf..1a0256f30d41 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -274,7 +274,7 @@ LLVM_DUMP_METHOD void DIE::dump() const { } #endif -unsigned DIE::computeOffsetsAndAbbrevs(const AsmPrinter *AP, +unsigned DIE::computeOffsetsAndAbbrevs(const dwarf::FormParams &FormParams, DIEAbbrevSet &AbbrevSet, unsigned CUOffset) { // Unique the abbreviation and fill in the abbreviation number so this DIE @@ -289,7 +289,7 @@ unsigned DIE::computeOffsetsAndAbbrevs(const AsmPrinter *AP, // Add the byte size of all the DIE attribute values. for (const auto &V : values()) - CUOffset += V.SizeOf(AP); + CUOffset += V.sizeOf(FormParams); // Let the children compute their offsets and abbreviation numbers. if (hasChildren()) { @@ -297,7 +297,8 @@ unsigned DIE::computeOffsetsAndAbbrevs(const AsmPrinter *AP, assert(Abbrev.hasChildren() && "Children flag not set"); for (auto &Child : children()) - CUOffset = Child.computeOffsetsAndAbbrevs(AP, AbbrevSet, CUOffset); + CUOffset = + Child.computeOffsetsAndAbbrevs(FormParams, AbbrevSet, CUOffset); // Each child chain is terminated with a zero byte, adjust the offset. CUOffset += sizeof(int8_t); @@ -335,13 +336,13 @@ void DIEValue::emitValue(const AsmPrinter *AP) const { } } -unsigned DIEValue::SizeOf(const AsmPrinter *AP) const { +unsigned DIEValue::sizeOf(const dwarf::FormParams &FormParams) const { switch (Ty) { case isNone: llvm_unreachable("Expected valid DIEValue"); #define HANDLE_DIEVALUE(T) \ case is##T: \ - return getDIE##T().SizeOf(AP, Form); + return getDIE##T().sizeOf(FormParams, Form); #include "llvm/CodeGen/DIEValue.def" } llvm_unreachable("Unknown DIE kind"); @@ -407,7 +408,8 @@ void DIEInteger::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_strp_sup: case dwarf::DW_FORM_addr: case dwarf::DW_FORM_ref_addr: - Asm->OutStreamer->emitIntValue(Integer, SizeOf(Asm, Form)); + Asm->OutStreamer->emitIntValue(Integer, + sizeOf(Asm->getDwarfFormParams(), Form)); return; case dwarf::DW_FORM_GNU_str_index: case dwarf::DW_FORM_GNU_addr_index: @@ -425,15 +427,12 @@ void DIEInteger::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const { } } -/// SizeOf - Determine size of integer value in bytes. +/// sizeOf - Determine size of integer value in bytes. /// -unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { - assert(AP && "AsmPrinter is required to set FormParams"); - dwarf::FormParams Params = {AP->getDwarfVersion(), - uint8_t(AP->getPointerSize()), - AP->OutStreamer->getContext().getDwarfFormat()}; - - if (Optional<uint8_t> FixedSize = dwarf::getFixedFormByteSize(Form, Params)) +unsigned DIEInteger::sizeOf(const dwarf::FormParams &FormParams, + dwarf::Form Form) const { + if (Optional<uint8_t> FixedSize = + dwarf::getFixedFormByteSize(Form, FormParams)) return *FixedSize; switch (Form) { @@ -464,19 +463,20 @@ void DIEInteger::print(raw_ostream &O) const { /// EmitValue - Emit expression value. /// void DIEExpr::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { - AP->emitDebugValue(Expr, SizeOf(AP, Form)); + AP->emitDebugValue(Expr, sizeOf(AP->getDwarfFormParams(), Form)); } /// SizeOf - Determine size of expression value in bytes. /// -unsigned DIEExpr::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEExpr::sizeOf(const dwarf::FormParams &FormParams, + dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_data4: return 4; case dwarf::DW_FORM_data8: return 8; case dwarf::DW_FORM_sec_offset: - return AP->getDwarfOffsetByteSize(); + return FormParams.getDwarfOffsetByteSize(); default: llvm_unreachable("DIE Value form not supported yet"); } @@ -493,12 +493,14 @@ void DIEExpr::print(raw_ostream &O) const { O << "Expr: " << *Expr; } /// void DIELabel::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { bool IsSectionRelative = Form != dwarf::DW_FORM_addr; - AP->emitLabelReference(Label, SizeOf(AP, Form), IsSectionRelative); + AP->emitLabelReference(Label, sizeOf(AP->getDwarfFormParams(), Form), + IsSectionRelative); } -/// SizeOf - Determine size of label value in bytes. +/// sizeOf - Determine size of label value in bytes. /// -unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIELabel::sizeOf(const dwarf::FormParams &FormParams, + dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_data4: return 4; @@ -506,9 +508,9 @@ unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { return 8; case dwarf::DW_FORM_sec_offset: case dwarf::DW_FORM_strp: - return AP->getDwarfOffsetByteSize(); + return FormParams.getDwarfOffsetByteSize(); case dwarf::DW_FORM_addr: - return AP->MAI->getCodePointerSize(); + return FormParams.AddrSize; default: llvm_unreachable("DIE Value form not supported yet"); } @@ -527,7 +529,7 @@ void DIEBaseTypeRef::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { AP->emitULEB128(Offset, nullptr, ULEB128PadSize); } -unsigned DIEBaseTypeRef::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEBaseTypeRef::sizeOf(const dwarf::FormParams &, dwarf::Form) const { return ULEB128PadSize; } @@ -541,19 +543,21 @@ void DIEBaseTypeRef::print(raw_ostream &O) const { O << "BaseTypeRef: " << Index /// EmitValue - Emit delta value. /// void DIEDelta::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { - AP->emitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form)); + AP->emitLabelDifference(LabelHi, LabelLo, + sizeOf(AP->getDwarfFormParams(), Form)); } /// SizeOf - Determine size of delta value in bytes. /// -unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEDelta::sizeOf(const dwarf::FormParams &FormParams, + dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_data4: return 4; case dwarf::DW_FORM_data8: return 8; case dwarf::DW_FORM_sec_offset: - return AP->getDwarfOffsetByteSize(); + return FormParams.getDwarfOffsetByteSize(); default: llvm_unreachable("DIE Value form not supported yet"); } @@ -592,9 +596,10 @@ void DIEString::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { } } -/// SizeOf - Determine size of delta value in bytes. +/// sizeOf - Determine size of delta value in bytes. /// -unsigned DIEString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEString::sizeOf(const dwarf::FormParams &FormParams, + dwarf::Form Form) const { // Index of string in symbol table. switch (Form) { case dwarf::DW_FORM_GNU_str_index: @@ -603,11 +608,11 @@ unsigned DIEString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_strx2: case dwarf::DW_FORM_strx3: case dwarf::DW_FORM_strx4: - return DIEInteger(S.getIndex()).SizeOf(AP, Form); + return DIEInteger(S.getIndex()).sizeOf(FormParams, Form); case dwarf::DW_FORM_strp: - if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) - return DIELabel(S.getSymbol()).SizeOf(AP, Form); - return DIEInteger(S.getOffset()).SizeOf(AP, Form); + if (FormParams.DwarfUsesRelocationsAcrossSections) + return DIELabel(S.getSymbol()).sizeOf(FormParams, Form); + return DIEInteger(S.getOffset()).sizeOf(FormParams, Form); default: llvm_unreachable("Expected valid string form"); } @@ -630,7 +635,7 @@ void DIEInlineString::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { llvm_unreachable("Expected valid string form"); } -unsigned DIEInlineString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEInlineString::sizeOf(const dwarf::FormParams &, dwarf::Form) const { // Emit string bytes + NULL byte. return S.size() + 1; } @@ -653,7 +658,8 @@ void DIEEntry::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_ref2: case dwarf::DW_FORM_ref4: case dwarf::DW_FORM_ref8: - AP->OutStreamer->emitIntValue(Entry->getOffset(), SizeOf(AP, Form)); + AP->OutStreamer->emitIntValue(Entry->getOffset(), + sizeOf(AP->getDwarfFormParams(), Form)); return; case dwarf::DW_FORM_ref_udata: @@ -665,11 +671,12 @@ void DIEEntry::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { uint64_t Addr = Entry->getDebugSectionOffset(); if (const MCSymbol *SectionSym = Entry->getUnit()->getCrossSectionRelativeBaseAddress()) { - AP->emitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true); + AP->emitLabelPlusOffset(SectionSym, Addr, + sizeOf(AP->getDwarfFormParams(), Form), true); return; } - AP->OutStreamer->emitIntValue(Addr, SizeOf(AP, Form)); + AP->OutStreamer->emitIntValue(Addr, sizeOf(AP->getDwarfFormParams(), Form)); return; } default: @@ -677,7 +684,8 @@ void DIEEntry::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { } } -unsigned DIEEntry::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEEntry::sizeOf(const dwarf::FormParams &FormParams, + dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_ref1: return 1; @@ -690,15 +698,7 @@ unsigned DIEEntry::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_ref_udata: return getULEB128Size(Entry->getOffset()); case dwarf::DW_FORM_ref_addr: - if (AP->getDwarfVersion() == 2) - return AP->MAI->getCodePointerSize(); - switch (AP->OutStreamer->getContext().getDwarfFormat()) { - case dwarf::DWARF32: - return 4; - case dwarf::DWARF64: - return 8; - } - llvm_unreachable("Invalid DWARF format"); + return FormParams.getRefAddrByteSize(); default: llvm_unreachable("Improper form for DIE reference"); @@ -714,12 +714,10 @@ void DIEEntry::print(raw_ostream &O) const { // DIELoc Implementation //===----------------------------------------------------------------------===// -/// ComputeSize - calculate the size of the location expression. -/// -unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const { +unsigned DIELoc::computeSize(const dwarf::FormParams &FormParams) const { if (!Size) { for (const auto &V : values()) - Size += V.SizeOf(AP); + Size += V.sizeOf(FormParams); } return Size; @@ -743,9 +741,9 @@ void DIELoc::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const { V.emitValue(Asm); } -/// SizeOf - Determine size of location data in bytes. +/// sizeOf - Determine size of location data in bytes. /// -unsigned DIELoc::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIELoc::sizeOf(const dwarf::FormParams &, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); @@ -766,12 +764,10 @@ void DIELoc::print(raw_ostream &O) const { // DIEBlock Implementation //===----------------------------------------------------------------------===// -/// ComputeSize - calculate the size of the block. -/// -unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const { +unsigned DIEBlock::computeSize(const dwarf::FormParams &FormParams) const { if (!Size) { for (const auto &V : values()) - Size += V.SizeOf(AP); + Size += V.sizeOf(FormParams); } return Size; @@ -797,9 +793,9 @@ void DIEBlock::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const { V.emitValue(Asm); } -/// SizeOf - Determine size of block data in bytes. +/// sizeOf - Determine size of block data in bytes. /// -unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEBlock::sizeOf(const dwarf::FormParams &, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); @@ -820,22 +816,23 @@ void DIEBlock::print(raw_ostream &O) const { // DIELocList Implementation //===----------------------------------------------------------------------===// -unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIELocList::sizeOf(const dwarf::FormParams &FormParams, + dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_loclistx: return getULEB128Size(Index); case dwarf::DW_FORM_data4: - assert(!AP->isDwarf64() && + assert(FormParams.Format != dwarf::DWARF64 && "DW_FORM_data4 is not suitable to emit a pointer to a location list " "in the 64-bit DWARF format"); return 4; case dwarf::DW_FORM_data8: - assert(AP->isDwarf64() && + assert(FormParams.Format == dwarf::DWARF64 && "DW_FORM_data8 is not suitable to emit a pointer to a location list " "in the 32-bit DWARF format"); return 8; case dwarf::DW_FORM_sec_offset: - return AP->getDwarfOffsetByteSize(); + return FormParams.getDwarfOffsetByteSize(); default: llvm_unreachable("DIE Value form not supported yet"); } @@ -860,9 +857,10 @@ void DIELocList::print(raw_ostream &O) const { O << "LocList: " << Index; } // DIEAddrOffset Implementation //===----------------------------------------------------------------------===// -unsigned DIEAddrOffset::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { - return Addr.SizeOf(AP, dwarf::DW_FORM_addrx) + - Offset.SizeOf(AP, dwarf::DW_FORM_data4); +unsigned DIEAddrOffset::sizeOf(const dwarf::FormParams &FormParams, + dwarf::Form) const { + return Addr.sizeOf(FormParams, dwarf::DW_FORM_addrx) + + Offset.sizeOf(FormParams, dwarf::DW_FORM_data4); } /// EmitValue - Emit label value. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index 5f4ee747fcca..e175854f7b93 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -207,6 +207,18 @@ void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, computeHash(Entry); } +void DIEHash::hashRawTypeReference(const DIE &Entry) { + unsigned &DieNumber = Numbering[&Entry]; + if (DieNumber) { + addULEB128('R'); + addULEB128(DieNumber); + return; + } + DieNumber = Numbering.size(); + addULEB128('T'); + computeHash(Entry); +} + // Hash all of the values in a block like set of values. This assumes that // all of the data is going to be added as integers. void DIEHash::hashBlockData(const DIE::const_value_range &Values) { @@ -298,10 +310,10 @@ void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) { addULEB128(Attribute); addULEB128(dwarf::DW_FORM_block); if (Value.getType() == DIEValue::isBlock) { - addULEB128(Value.getDIEBlock().ComputeSize(AP)); + addULEB128(Value.getDIEBlock().computeSize(AP->getDwarfFormParams())); hashBlockData(Value.getDIEBlock().values()); } else if (Value.getType() == DIEValue::isLoc) { - addULEB128(Value.getDIELoc().ComputeSize(AP)); + addULEB128(Value.getDIELoc().computeSize(AP->getDwarfFormParams())); hashBlockData(Value.getDIELoc().values()); } else { // We could add the block length, but that would take diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h index 29e1da4c5d60..24a973b39271 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h @@ -62,6 +62,8 @@ public: /// Encodes and adds \param Value to the hash as a SLEB128. void addSLEB128(int64_t Value); + void hashRawTypeReference(const DIE &Entry); + private: /// Adds \param Str to the hash and includes a NULL byte. void addString(StringRef Str); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 4df34d2c9402..18fc46c74eb4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -155,7 +155,8 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) { if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && - Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_atomic_type) + Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_atomic_type && + Tag != dwarf::DW_TAG_immutable_type) return DDTy->getSizeInBits(); DIType *BaseType = DDTy->getBaseType(); @@ -210,7 +211,8 @@ bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) { return true; assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type || T == dwarf::DW_TAG_volatile_type || - T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type); + T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type || + T == dwarf::DW_TAG_immutable_type); assert(DTy->getBaseType() && "Expected valid base type"); return isUnsignedDIType(DTy->getBaseType()); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 9b73f0ab2f05..5913c687db48 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -127,9 +127,14 @@ unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) { if (!File) return Asm->OutStreamer->emitDwarfFileDirective(0, "", "", None, None, CUID); - return Asm->OutStreamer->emitDwarfFileDirective( - 0, File->getDirectory(), File->getFilename(), DD->getMD5AsBytes(File), - File->getSource(), CUID); + + if (LastFile != File) { + LastFile = File; + LastFileID = Asm->OutStreamer->emitDwarfFileDirective( + 0, File->getDirectory(), File->getFilename(), DD->getMD5AsBytes(File), + File->getSource(), CUID); + } + return LastFileID; } DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( @@ -260,9 +265,20 @@ void DwarfCompileUnit::addLocationAttribute( if (Global) { const MCSymbol *Sym = Asm->getSymbol(Global); - unsigned PointerSize = Asm->getDataLayout().getPointerSize(); - assert((PointerSize == 4 || PointerSize == 8) && - "Add support for other sizes if necessary"); + // 16-bit platforms like MSP430 and AVR take this path, so sink this + // assert to platforms that use it. + auto GetPointerSizedFormAndOp = [this]() { + unsigned PointerSize = Asm->getDataLayout().getPointerSize(); + assert((PointerSize == 4 || PointerSize == 8) && + "Add support for other sizes if necessary"); + struct FormAndOp { + dwarf::Form Form; + dwarf::LocationAtom Op; + }; + return PointerSize == 4 + ? FormAndOp{dwarf::DW_FORM_data4, dwarf::DW_OP_const4u} + : FormAndOp{dwarf::DW_FORM_data8, dwarf::DW_OP_const8u}; + }; if (Global->isThreadLocal()) { if (Asm->TM.useEmulatedTLS()) { // TODO: add debug info for emulated thread local mode. @@ -270,15 +286,12 @@ void DwarfCompileUnit::addLocationAttribute( // FIXME: Make this work with -gsplit-dwarf. // Based on GCC's support for TLS: if (!DD->useSplitDwarf()) { + auto FormAndOp = GetPointerSizedFormAndOp(); // 1) Start with a constNu of the appropriate pointer size - addUInt(*Loc, dwarf::DW_FORM_data1, - PointerSize == 4 ? dwarf::DW_OP_const4u - : dwarf::DW_OP_const8u); + addUInt(*Loc, dwarf::DW_FORM_data1, FormAndOp.Op); // 2) containing the (relocated) offset of the TLS variable // within the module's TLS block. - addExpr(*Loc, - PointerSize == 4 ? dwarf::DW_FORM_data4 - : dwarf::DW_FORM_data8, + addExpr(*Loc, FormAndOp.Form, Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); } else { addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); @@ -292,13 +305,11 @@ void DwarfCompileUnit::addLocationAttribute( } } else if (Asm->TM.getRelocationModel() == Reloc::RWPI || Asm->TM.getRelocationModel() == Reloc::ROPI_RWPI) { + auto FormAndOp = GetPointerSizedFormAndOp(); // Constant - addUInt(*Loc, dwarf::DW_FORM_data1, - PointerSize == 4 ? dwarf::DW_OP_const4u - : dwarf::DW_OP_const8u); + addUInt(*Loc, dwarf::DW_FORM_data1, FormAndOp.Op); // Relocation offset - addExpr(*Loc, PointerSize == 4 ? dwarf::DW_FORM_data4 - : dwarf::DW_FORM_data8, + addExpr(*Loc, FormAndOp.Form, Asm->getObjFileLowering().getIndirectSymViaRWPI(Sym)); // Base register Register BaseReg = Asm->getObjFileLowering().getStaticBase(); @@ -1575,7 +1586,8 @@ void DwarfCompileUnit::createBaseTypeDIEs() { Twine(dwarf::AttributeEncodingString(Btr.Encoding) + "_" + Twine(Btr.BitSize)).toStringRef(Str)); addUInt(Die, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Btr.Encoding); - addUInt(Die, dwarf::DW_AT_byte_size, None, Btr.BitSize / 8); + // Round up to smallest number of bytes that contains this number of bits. + addUInt(Die, dwarf::DW_AT_byte_size, None, divideCeil(Btr.BitSize, 8)); Btr.Die = &Die; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index fb03982b5e4a..f2e1f6346803 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -86,6 +86,9 @@ class DwarfCompileUnit final : public DwarfUnit { /// DWO ID for correlating skeleton and split units. uint64_t DWOId = 0; + const DIFile *LastFile = nullptr; + unsigned LastFileID; + /// Construct a DIE for the given DbgVariable without initializing the /// DbgVariable's DIE reference. DIE *constructVariableDIEImpl(const DbgVariable &DV, bool Abstract); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 48134f1fd774..680b9586228f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -2539,12 +2539,10 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, if (Op.getDescription().Op[I] == Encoding::SizeNA) continue; if (Op.getDescription().Op[I] == Encoding::BaseTypeRef) { - uint64_t Offset = - CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die->getOffset(); - assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit"); - Streamer.emitULEB128(Offset, "", ULEB128PadSize); + unsigned Length = + Streamer.emitDIERef(*CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die); // Make sure comments stay aligned. - for (unsigned J = 0; J < ULEB128PadSize; ++J) + for (unsigned J = 0; J < Length; ++J) if (Comment != End) Comment++; } else { @@ -3369,7 +3367,8 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, // Fast path if we're building some type units and one has already used the // address pool we know we're going to throw away all this work anyway, so // don't bother building dependent types. - if (!TypeUnitsUnderConstruction.empty() && AddrPool.hasBeenUsed()) + if (!TypeUnitsUnderConstruction.empty() && + (AddrPool.hasBeenUsed() || SeenLocalType)) return; auto Ins = TypeSignatures.insert(std::make_pair(CTy, 0)); @@ -3380,6 +3379,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, bool TopLevelType = TypeUnitsUnderConstruction.empty(); AddrPool.resetUsedFlag(); + SeenLocalType = false; auto OwnedUnit = std::make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder, getDwoLineTable(CU)); @@ -3423,7 +3423,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, // Types referencing entries in the address table cannot be placed in type // units. - if (AddrPool.hasBeenUsed()) { + if (AddrPool.hasBeenUsed() || SeenLocalType) { // Remove all the types built while building this type. // This is pessimistic as some of these types might not be dependent on @@ -3451,14 +3451,18 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD) : DD(DD), - TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)), AddrPoolUsed(DD->AddrPool.hasBeenUsed()) { + TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)), + AddrPoolUsed(DD->AddrPool.hasBeenUsed()), + SeenLocalType(DD->SeenLocalType) { DD->TypeUnitsUnderConstruction.clear(); DD->AddrPool.resetUsedFlag(); + DD->SeenLocalType = false; } DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() { DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction); DD->AddrPool.resetUsedFlag(AddrPoolUsed); + DD->SeenLocalType = SeenLocalType; } DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 4e1a1b1e068d..0043000652e8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -433,6 +433,7 @@ private: DenseMap<const DIStringType *, unsigned> StringTypeLocMap; AddressPool AddrPool; + bool SeenLocalType = false; /// Accelerator tables. AccelTable<DWARF5AccelTableData> AccelDebugNames; @@ -671,6 +672,7 @@ public: DwarfDebug *DD; decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction; bool AddrPoolUsed; + bool SeenLocalType; friend class DwarfDebug; NonTypeUnitContext(DwarfDebug *DD); public: @@ -679,6 +681,7 @@ public: }; NonTypeUnitContext enterNonTypeUnitContext(); + void seenLocalType() { SeenLocalType = true; } /// Add a label so that arange data can be generated for it. void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h index 40898c9fc855..4defa8a30855 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h @@ -98,6 +98,8 @@ class LLVM_LIBRARY_VISIBILITY AIXException : public DwarfCFIExceptionBase { public: AIXException(AsmPrinter *A); + void markFunctionEnd() override; + void endModule() override {} void beginFunction(const MachineFunction *MF) override {} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 37407c98e75f..ee932d105107 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -681,9 +681,25 @@ void DwarfExpression::emitLegacySExt(unsigned FromBits) { } void DwarfExpression::emitLegacyZExt(unsigned FromBits) { - // (X & (1 << FromBits - 1)) - emitOp(dwarf::DW_OP_constu); - emitUnsigned((1ULL << FromBits) - 1); + // Heuristic to decide the most efficient encoding. + // A ULEB can encode 7 1-bits per byte. + if (FromBits / 7 < 1+1+1+1+1) { + // (X & (1 << FromBits - 1)) + emitOp(dwarf::DW_OP_constu); + emitUnsigned((1ULL << FromBits) - 1); + } else { + // Note that the DWARF 4 stack consists of pointer-sized elements, + // so technically it doesn't make sense to shift left more than 64 + // bits. We leave that for the consumer to decide though. LLDB for + // example uses APInt for the stack elements and can still deal + // with this. + emitOp(dwarf::DW_OP_lit1); + emitOp(dwarf::DW_OP_constu); + emitUnsigned(FromBits); + emitOp(dwarf::DW_OP_shl); + emitOp(dwarf::DW_OP_lit1); + emitOp(dwarf::DW_OP_minus); + } emitOp(dwarf::DW_OP_and); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp index 838e1c9a10be..a67d0f032cf6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -92,7 +92,8 @@ unsigned DwarfFile::computeSizeAndOffsetsForUnit(DwarfUnit *TheU) { // Compute the size and offset of a DIE. The offset is relative to start of the // CU. It returns the offset after laying out the DIE. unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) { - return Die.computeOffsetsAndAbbrevs(Asm, Abbrevs, Offset); + return Die.computeOffsetsAndAbbrevs(Asm->getDwarfFormParams(), Abbrevs, + Offset); } void DwarfFile::emitAbbrevs(MCSection *Section) { Abbrevs.Emit(Asm, Section); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 6b6d63f14f87..15d90c54adfc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -77,7 +77,7 @@ void DIEDwarfExpression::enableTemporaryBuffer() { void DIEDwarfExpression::disableTemporaryBuffer() { IsBuffering = false; } unsigned DIEDwarfExpression::getTemporaryBufferSize() { - return TmpDIE.ComputeSize(&AP); + return TmpDIE.computeSize(AP.getDwarfFormParams()); } void DIEDwarfExpression::commitTemporaryBuffer() { OutDIE.takeValues(TmpDIE); } @@ -394,14 +394,14 @@ DIE &DwarfUnit::createAndAddDIE(dwarf::Tag Tag, DIE &Parent, const DINode *N) { } void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc) { - Loc->ComputeSize(Asm); + Loc->computeSize(Asm->getDwarfFormParams()); DIELocs.push_back(Loc); // Memoize so we can call the destructor later on. addAttribute(Die, Attribute, Loc->BestForm(DD->getDwarfVersion()), Loc); } void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form, DIEBlock *Block) { - Block->ComputeSize(Asm); + Block->computeSize(Asm->getDwarfFormParams()); DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. addAttribute(Die, Attribute, Form, Block); } @@ -597,10 +597,8 @@ DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE, // Skip updating the accelerator tables since this is not the full type. if (MDString *TypeId = CTy->getRawIdentifier()) DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy); - else { - auto X = DD->enterNonTypeUnitContext(); + else finishNonUnitTypeDIE(TyDIE, CTy); - } return &TyDIE; } constructTypeDIE(TyDIE, CTy); @@ -744,6 +742,16 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIStringType *STy) { addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); } + if (DIExpression *Expr = STy->getStringLocationExp()) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc); + // This is to describe the memory location of the + // string, so lock it down as such. + DwarfExpr.setMemoryLocationKind(); + DwarfExpr.addExpression(Expr); + addBlock(Buffer, dwarf::DW_AT_data_location, DwarfExpr.finalize()); + } + if (STy->getEncoding()) { // For eventual Unicode support. addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, @@ -1189,7 +1197,7 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP, DefinitionArgs = SP->getType()->getTypeArray(); if (DeclArgs.size() && DefinitionArgs.size()) - if (DefinitionArgs[0] != NULL && DeclArgs[0] != DefinitionArgs[0]) + if (DefinitionArgs[0] != nullptr && DeclArgs[0] != DefinitionArgs[0]) addType(SPDie, DefinitionArgs[0]); DeclDie = getDIE(SPDecl); @@ -1842,5 +1850,25 @@ void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) { StringRef Name = CTy->getName(); if (!Name.empty()) addString(D, dwarf::DW_AT_name, Name); + if (Name.startswith("_STN") || !Name.contains('<')) + addTemplateParams(D, CTy->getTemplateParams()); + // If the type is in an anonymous namespace, we can't reference it from a TU + // (since the type would be CU local and the TU doesn't specify which TU has + // the appropriate type definition) - so flag this emission as such and skip + // the rest of the emission now since we're going to throw out all this work + // and put the outer/referencing type in the CU instead. + // FIXME: Probably good to generalize this to a DICompositeType flag populated + // by the frontend, then we could use that to have types that can have + // decl+def merged by LTO but where the definition still doesn't go in a type + // unit because the type has only one definition. + for (DIScope *S = CTy->getScope(); S; S = S->getScope()) { + if (auto *NS = dyn_cast<DINamespace>(S)) { + if (NS->getName().empty()) { + DD->seenLocalType(); + break; + } + } + } + auto X = DD->enterNonTypeUnitContext(); getCU().createTypeDIE(CTy); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 54b0079dd7ce..330f3bacca43 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -25,9 +25,7 @@ namespace llvm { class ConstantFP; class ConstantInt; -class DbgVariable; class DwarfCompileUnit; -class MachineOperand; class MCDwarfDwoLineTable; class MCSymbol; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h index 7d5e51218693..a92a89084cad 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h @@ -19,8 +19,6 @@ namespace llvm { class AsmPrinter; -class MCStreamer; -class Module; class DILocation; class PseudoProbeHandler : public AsmPrinterHandler { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp index 1e3f33e70715..ad8432343a60 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp @@ -27,7 +27,7 @@ using namespace llvm; -WinCFGuard::WinCFGuard(AsmPrinter *A) : AsmPrinterHandler(), Asm(A) {} +WinCFGuard::WinCFGuard(AsmPrinter *A) : Asm(A) {} WinCFGuard::~WinCFGuard() {} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h index 2a4ea92a92aa..95d5dcfbbd0f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h @@ -23,7 +23,6 @@ class BasicBlock; class MachineBranchProbabilityInfo; class MachineFunction; class MachineLoopInfo; -class MachineModuleInfo; class MachineRegisterInfo; class MBFIWrapper; class ProfileSummaryInfo; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp index 1c2e3f998449..de173a9dfd62 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp @@ -347,7 +347,7 @@ bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) { } if (ForceFullCFA) { - MF.getSubtarget().getFrameLowering()->emitCalleeSavedFrameMoves( + MF.getSubtarget().getFrameLowering()->emitCalleeSavedFrameMovesFullCFA( *MBBInfo.MBB, MBBI); InsertedCFIInstr = true; PrevMBBInfo = &MBBInfo; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp index 5f9982cd155d..84a0e4142bb6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -43,9 +43,9 @@ void VirtRegAuxInfo::calculateSpillWeightsAndHints() { } // Return the preferred allocation register for reg, given a COPY instruction. -static Register copyHint(const MachineInstr *MI, unsigned Reg, - const TargetRegisterInfo &TRI, - const MachineRegisterInfo &MRI) { +Register VirtRegAuxInfo::copyHint(const MachineInstr *MI, unsigned Reg, + const TargetRegisterInfo &TRI, + const MachineRegisterInfo &MRI) { unsigned Sub, HSub; Register HReg; if (MI->getOperand(0).getReg() == Reg) { @@ -77,9 +77,10 @@ static Register copyHint(const MachineInstr *MI, unsigned Reg, } // Check if all values in LI are rematerializable -static bool isRematerializable(const LiveInterval &LI, const LiveIntervals &LIS, - const VirtRegMap &VRM, - const TargetInstrInfo &TII) { +bool VirtRegAuxInfo::isRematerializable(const LiveInterval &LI, + const LiveIntervals &LIS, + const VirtRegMap &VRM, + const TargetInstrInfo &TII) { Register Reg = LI.reg(); Register Original = VRM.getOriginal(Reg); for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp index 747f4e4fdecc..28f24e5ea908 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -4168,11 +4168,11 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst, // We can get through binary operator, if it is legal. In other words, the // binary operator must have a nuw or nsw flag. - const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst); - if (isa_and_nonnull<OverflowingBinaryOperator>(BinOp) && - ((!IsSExt && BinOp->hasNoUnsignedWrap()) || - (IsSExt && BinOp->hasNoSignedWrap()))) - return true; + if (const auto *BinOp = dyn_cast<BinaryOperator>(Inst)) + if (isa<OverflowingBinaryOperator>(BinOp) && + ((!IsSExt && BinOp->hasNoUnsignedWrap()) || + (IsSExt && BinOp->hasNoSignedWrap()))) + return true; // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst)) if ((Inst->getOpcode() == Instruction::And || @@ -4181,10 +4181,10 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst, // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst)) if (Inst->getOpcode() == Instruction::Xor) { - const ConstantInt *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)); // Make sure it is not a NOT. - if (Cst && !Cst->getValue().isAllOnes()) - return true; + if (const auto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1))) + if (!Cst->getValue().isAllOnes()) + return true; } // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst)) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp index 3bed81d5841d..1d50e1d22b95 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp @@ -90,7 +90,6 @@ CGOPT(bool, EnableAddrsig) CGOPT(bool, EmitCallSiteInfo) CGOPT(bool, EnableMachineFunctionSplitter) CGOPT(bool, EnableDebugEntryValues) -CGOPT_EXP(bool, ValueTrackingVariableLocations) CGOPT(bool, ForceDwarfFrameSection) CGOPT(bool, XRayOmitFunctionIndex) CGOPT(bool, DebugStrictDwarf) @@ -433,12 +432,6 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(false)); CGBINDOPT(EnableDebugEntryValues); - static cl::opt<bool> ValueTrackingVariableLocations( - "experimental-debug-variable-locations", - cl::desc("Use experimental new value-tracking variable locations"), - cl::init(false)); - CGBINDOPT(ValueTrackingVariableLocations); - static cl::opt<bool> EnableMachineFunctionSplitter( "split-machine-functions", cl::desc("Split out cold basic blocks from machine functions based on " @@ -539,12 +532,6 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.DebugStrictDwarf = getDebugStrictDwarf(); Options.LoopAlignment = getAlignLoops(); - if (auto Opt = getExplicitValueTrackingVariableLocations()) - Options.ValueTrackingVariableLocations = *Opt; - else - Options.ValueTrackingVariableLocations = - getDefaultValueTrackingVariableLocations(TheTriple); - Options.MCOptions = mc::InitMCTargetOptionsFromFlags(); Options.ThreadModel = getThreadModel(); @@ -620,7 +607,7 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features, Function &F) { auto &Ctx = F.getContext(); AttributeList Attrs = F.getAttributes(); - AttrBuilder NewAttrs; + AttrBuilder NewAttrs(Ctx); if (!CPU.empty() && !F.hasFnAttribute("target-cpu")) NewAttrs.addAttribute("target-cpu", CPU); @@ -698,8 +685,3 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features, setFunctionAttributes(CPU, Features, F); } -bool codegen::getDefaultValueTrackingVariableLocations(const llvm::Triple &T) { - if (T.getArch() == llvm::Triple::x86_64) - return true; - return false; -} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index 901409ea9f8f..eb2d449bc4af 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -40,8 +40,7 @@ using namespace llvm; CriticalAntiDepBreaker::CriticalAntiDepBreaker(MachineFunction &MFi, const RegisterClassInfo &RCI) - : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), - TII(MF.getSubtarget().getInstrInfo()), + : MF(MFi), MRI(MF.getRegInfo()), TII(MF.getSubtarget().getInstrInfo()), TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI), Classes(TRI->getNumRegs(), nullptr), KillIndices(TRI->getNumRegs(), 0), DefIndices(TRI->getNumRegs(), 0), KeepRegs(TRI->getNumRegs(), false) {} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp index 7300ea6b50ee..d9caa8ad42d0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -68,9 +68,16 @@ void ExpandPostRA::TransferImplicitOperands(MachineInstr *MI) { MachineBasicBlock::iterator CopyMI = MI; --CopyMI; - for (const MachineOperand &MO : MI->implicit_operands()) - if (MO.isReg()) - CopyMI->addOperand(MO); + Register DstReg = MI->getOperand(0).getReg(); + for (const MachineOperand &MO : MI->implicit_operands()) { + CopyMI->addOperand(MO); + + // Be conservative about preserving kills when subregister defs are + // involved. If there was implicit kill of a super-register overlapping the + // copy result, we would kill the subregisters previous copies defined. + if (MO.isKill() && TRI->regsOverlap(DstReg, MO.getReg())) + CopyMI->getOperand(CopyMI->getNumOperands() - 1).setIsKill(false); + } } bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index 2676becdd807..1a642e233a6a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -191,10 +191,10 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc, assert(DstOps.size() == 1 && "Invalid dsts"); if (SrcOps[0].getLLTTy(*getMRI()).isVector()) { // Try to constant fold vector constants. - auto VecCst = ConstantFoldVectorBinop( + Register VecCst = ConstantFoldVectorBinop( Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI(), *this); if (VecCst) - return MachineInstrBuilder(getMF(), *VecCst); + return buildCopy(DstOps[0], VecCst); break; } if (Optional<APInt> Cst = ConstantFoldBinOp(Opc, SrcOps[0].getReg(), diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index d061664e8c5d..1ec7868f2234 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -86,6 +86,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, CallLoweringInfo Info; const DataLayout &DL = MIRBuilder.getDataLayout(); MachineFunction &MF = MIRBuilder.getMF(); + MachineRegisterInfo &MRI = MF.getRegInfo(); bool CanBeTailCalled = CB.isTailCall() && isInTailCallPosition(CB, MF.getTarget()) && (MF.getFunction() @@ -109,6 +110,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, CanBeTailCalled = false; } + // First step is to marshall all the function's parameters into the correct // physregs and memory locations. Gather the sequence of argument types that // we'll pass to the assigner function. @@ -136,10 +138,23 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, else Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false); + Register ReturnHintAlignReg; + Align ReturnHintAlign; + Info.OrigRet = ArgInfo{ResRegs, RetTy, 0, ISD::ArgFlagsTy{}}; - if (!Info.OrigRet.Ty->isVoidTy()) + + if (!Info.OrigRet.Ty->isVoidTy()) { setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB); + if (MaybeAlign Alignment = CB.getRetAlign()) { + if (*Alignment > Align(1)) { + ReturnHintAlignReg = MRI.cloneVirtualRegister(ResRegs[0]); + Info.OrigRet.Regs[0] = ReturnHintAlignReg; + ReturnHintAlign = *Alignment; + } + } + } + Info.CB = &CB; Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees); Info.CallConv = CallConv; @@ -147,7 +162,15 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, Info.IsMustTailCall = CB.isMustTailCall(); Info.IsTailCall = CanBeTailCalled; Info.IsVarArg = IsVarArg; - return lowerCall(MIRBuilder, Info); + if (!lowerCall(MIRBuilder, Info)) + return false; + + if (ReturnHintAlignReg && !Info.IsTailCall) { + MIRBuilder.buildAssertAlign(ResRegs[0], ReturnHintAlignReg, + ReturnHintAlign); + } + + return true; } template <typename FuncInfoTy> @@ -509,7 +532,8 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, bool CallLowering::determineAndHandleAssignments( ValueHandler &Handler, ValueAssigner &Assigner, SmallVectorImpl<ArgInfo> &Args, MachineIRBuilder &MIRBuilder, - CallingConv::ID CallConv, bool IsVarArg, Register ThisReturnReg) const { + CallingConv::ID CallConv, bool IsVarArg, + ArrayRef<Register> ThisReturnRegs) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); SmallVector<CCValAssign, 16> ArgLocs; @@ -519,7 +543,7 @@ bool CallLowering::determineAndHandleAssignments( return false; return handleAssignments(Handler, Args, CCInfo, ArgLocs, MIRBuilder, - ThisReturnReg); + ThisReturnRegs); } static unsigned extendOpFromFlags(llvm::ISD::ArgFlagsTy Flags) { @@ -596,7 +620,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler, CCState &CCInfo, SmallVectorImpl<CCValAssign> &ArgLocs, MachineIRBuilder &MIRBuilder, - Register ThisReturnReg) const { + ArrayRef<Register> ThisReturnRegs) const { MachineFunction &MF = MIRBuilder.getMF(); MachineRegisterInfo &MRI = MF.getRegInfo(); const Function &F = MF.getFunction(); @@ -740,10 +764,10 @@ bool CallLowering::handleAssignments(ValueHandler &Handler, assert(!VA.needsCustom() && "custom loc should have been handled already"); - if (i == 0 && ThisReturnReg.isValid() && + if (i == 0 && !ThisReturnRegs.empty() && Handler.isIncomingArgumentHandler() && isTypeIsValidForThisReturn(ValVT)) { - Handler.assignValueToReg(Args[i].Regs[i], ThisReturnReg, VA); + Handler.assignValueToReg(ArgReg, ThisReturnRegs[Part], VA); continue; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp index dd1ef74e8ad0..30f8838805b5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -56,8 +56,7 @@ class WorkListMaintainer : public GISelChangeObserver { SmallPtrSet<const MachineInstr *, 4> CreatedInstrs; public: - WorkListMaintainer(WorkListTy &WorkList) - : GISelChangeObserver(), WorkList(WorkList) {} + WorkListMaintainer(WorkListTy &WorkList) : WorkList(WorkList) {} virtual ~WorkListMaintainer() { } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index f7a634dad61a..d6a009744161 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1748,6 +1748,20 @@ void CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI, MI.eraseFromParent(); } +bool CombinerHelper::matchCombineUnmergeUndef( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + unsigned SrcIdx = MI.getNumOperands() - 1; + Register SrcReg = MI.getOperand(SrcIdx).getReg(); + MatchInfo = [&MI](MachineIRBuilder &B) { + unsigned NumElems = MI.getNumOperands() - 1; + for (unsigned Idx = 0; Idx < NumElems; ++Idx) { + Register DstReg = MI.getOperand(Idx).getReg(); + B.buildUndef(DstReg); + } + }; + return isa<GImplicitDef>(MRI.getVRegDef(SrcReg)); +} + bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "Expected an unmerge"); @@ -2025,16 +2039,19 @@ void CombinerHelper::applyCombineAddP2IToPtrAdd( } bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI, - int64_t &NewCst) { + APInt &NewCst) { auto &PtrAdd = cast<GPtrAdd>(MI); Register LHS = PtrAdd.getBaseReg(); Register RHS = PtrAdd.getOffsetReg(); MachineRegisterInfo &MRI = Builder.getMF().getRegInfo(); - if (auto RHSCst = getIConstantVRegSExtVal(RHS, MRI)) { - int64_t Cst; + if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) { + APInt Cst; if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) { - NewCst = Cst + *RHSCst; + auto DstTy = MRI.getType(PtrAdd.getReg(0)); + // G_INTTOPTR uses zero-extension + NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits()); + NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits()); return true; } } @@ -2043,7 +2060,7 @@ bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI, } void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI, - int64_t &NewCst) { + APInt &NewCst) { auto &PtrAdd = cast<GPtrAdd>(MI); Register Dst = PtrAdd.getReg(0); @@ -3875,39 +3892,48 @@ bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI, LLT Ty = MRI.getType(Dst); unsigned BitWidth = Ty.getScalarSizeInBits(); - Register ShlSrc, ShlAmt, LShrSrc, LShrAmt; + Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt; unsigned FshOpc = 0; - // Match (or (shl x, amt), (lshr y, sub(bw, amt))). - if (mi_match( - Dst, MRI, - // m_GOr() handles the commuted version as well. - m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)), - m_GLShr(m_Reg(LShrSrc), m_GSub(m_SpecificICstOrSplat(BitWidth), - m_Reg(LShrAmt)))))) { + // Match (or (shl ...), (lshr ...)). + if (!mi_match(Dst, MRI, + // m_GOr() handles the commuted version as well. + m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)), + m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt))))) + return false; + + // Given constants C0 and C1 such that C0 + C1 is bit-width: + // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1) + // TODO: Match constant splat. + int64_t CstShlAmt, CstLShrAmt; + if (mi_match(ShlAmt, MRI, m_ICst(CstShlAmt)) && + mi_match(LShrAmt, MRI, m_ICst(CstLShrAmt)) && + CstShlAmt + CstLShrAmt == BitWidth) { + FshOpc = TargetOpcode::G_FSHR; + Amt = LShrAmt; + + } else if (mi_match(LShrAmt, MRI, + m_GSub(m_SpecificICstOrSplat(BitWidth), m_Reg(Amt))) && + ShlAmt == Amt) { + // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt) FshOpc = TargetOpcode::G_FSHL; - // Match (or (shl x, sub(bw, amt)), (lshr y, amt)). - } else if (mi_match(Dst, MRI, - m_GOr(m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)), - m_GShl(m_Reg(ShlSrc), - m_GSub(m_SpecificICstOrSplat(BitWidth), - m_Reg(ShlAmt)))))) { + } else if (mi_match(ShlAmt, MRI, + m_GSub(m_SpecificICstOrSplat(BitWidth), m_Reg(Amt))) && + LShrAmt == Amt) { + // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt) FshOpc = TargetOpcode::G_FSHR; } else { return false; } - if (ShlAmt != LShrAmt) - return false; - - LLT AmtTy = MRI.getType(ShlAmt); + LLT AmtTy = MRI.getType(Amt); if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}})) return false; MatchInfo = [=](MachineIRBuilder &B) { - B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, ShlAmt}); + B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt}); }; return true; } @@ -4127,8 +4153,9 @@ bool CombinerHelper::matchBitfieldExtractFromAnd( assert(MI.getOpcode() == TargetOpcode::G_AND); Register Dst = MI.getOperand(0).getReg(); LLT Ty = MRI.getType(Dst); - if (!getTargetLowering().isConstantUnsignedBitfieldExtactLegal( - TargetOpcode::G_UBFX, Ty, Ty)) + LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty); + if (!getTargetLowering().isConstantUnsignedBitfieldExtractLegal( + TargetOpcode::G_UBFX, Ty, ExtractTy)) return false; int64_t AndImm, LSBImm; @@ -4148,7 +4175,6 @@ bool CombinerHelper::matchBitfieldExtractFromAnd( if (static_cast<uint64_t>(LSBImm) >= Size) return false; - LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty); uint64_t Width = APInt(Size, AndImm).countTrailingOnes(); MatchInfo = [=](MachineIRBuilder &B) { auto WidthCst = B.buildConstant(ExtractTy, Width); @@ -4214,8 +4240,9 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd( const Register Dst = MI.getOperand(0).getReg(); LLT Ty = MRI.getType(Dst); - if (!getTargetLowering().isConstantUnsignedBitfieldExtactLegal( - TargetOpcode::G_UBFX, Ty, Ty)) + LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty); + if (!getTargetLowering().isConstantUnsignedBitfieldExtractLegal( + TargetOpcode::G_UBFX, Ty, ExtractTy)) return false; // Try to match shr (and x, c1), c2 @@ -4249,8 +4276,8 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd( return false; MatchInfo = [=](MachineIRBuilder &B) { - auto WidthCst = B.buildConstant(Ty, Width); - auto PosCst = B.buildConstant(Ty, Pos); + auto WidthCst = B.buildConstant(ExtractTy, Width); + auto PosCst = B.buildConstant(ExtractTy, Pos); B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst}); }; return true; @@ -4850,37 +4877,39 @@ bool CombinerHelper::matchCombineFAddFMulToFMadOrFMA( if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) return false; - MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg()); - MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg()); + Register Op1 = MI.getOperand(1).getReg(); + Register Op2 = MI.getOperand(2).getReg(); + DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1}; + DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2}; unsigned PreferredFusedOpcode = HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), // prefer to fold the multiply with fewer uses. - if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) && - isContractableFMul(*RHS, AllowFusionGlobally)) { - if (hasMoreUses(*LHS, *RHS, MRI)) + if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) && + isContractableFMul(*RHS.MI, AllowFusionGlobally)) { + if (hasMoreUses(*LHS.MI, *RHS.MI, MRI)) std::swap(LHS, RHS); } // fold (fadd (fmul x, y), z) -> (fma x, y, z) - if (isContractableFMul(*LHS, AllowFusionGlobally) && - (Aggressive || MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg()))) { + if (isContractableFMul(*LHS.MI, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) { MatchInfo = [=, &MI](MachineIRBuilder &B) { B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, - {LHS->getOperand(1).getReg(), LHS->getOperand(2).getReg(), - RHS->getOperand(0).getReg()}); + {LHS.MI->getOperand(1).getReg(), + LHS.MI->getOperand(2).getReg(), RHS.Reg}); }; return true; } // fold (fadd x, (fmul y, z)) -> (fma y, z, x) - if (isContractableFMul(*RHS, AllowFusionGlobally) && - (Aggressive || MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg()))) { + if (isContractableFMul(*RHS.MI, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) { MatchInfo = [=, &MI](MachineIRBuilder &B) { B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, - {RHS->getOperand(1).getReg(), RHS->getOperand(2).getReg(), - LHS->getOperand(0).getReg()}); + {RHS.MI->getOperand(1).getReg(), + RHS.MI->getOperand(2).getReg(), LHS.Reg}); }; return true; } @@ -4897,8 +4926,10 @@ bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMA( return false; const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering(); - MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg()); - MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg()); + Register Op1 = MI.getOperand(1).getReg(); + Register Op2 = MI.getOperand(2).getReg(); + DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1}; + DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2}; LLT DstType = MRI.getType(MI.getOperand(0).getReg()); unsigned PreferredFusedOpcode = @@ -4906,42 +4937,38 @@ bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMA( // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), // prefer to fold the multiply with fewer uses. - if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) && - isContractableFMul(*RHS, AllowFusionGlobally)) { - if (hasMoreUses(*LHS, *RHS, MRI)) + if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) && + isContractableFMul(*RHS.MI, AllowFusionGlobally)) { + if (hasMoreUses(*LHS.MI, *RHS.MI, MRI)) std::swap(LHS, RHS); } // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) MachineInstr *FpExtSrc; - if (mi_match(LHS->getOperand(0).getReg(), MRI, - m_GFPExt(m_MInstr(FpExtSrc))) && + if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) && isContractableFMul(*FpExtSrc, AllowFusionGlobally) && TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType, MRI.getType(FpExtSrc->getOperand(1).getReg()))) { MatchInfo = [=, &MI](MachineIRBuilder &B) { auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg()); auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg()); - B.buildInstr( - PreferredFusedOpcode, {MI.getOperand(0).getReg()}, - {FpExtX.getReg(0), FpExtY.getReg(0), RHS->getOperand(0).getReg()}); + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg}); }; return true; } // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z) // Note: Commutes FADD operands. - if (mi_match(RHS->getOperand(0).getReg(), MRI, - m_GFPExt(m_MInstr(FpExtSrc))) && + if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) && isContractableFMul(*FpExtSrc, AllowFusionGlobally) && TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType, MRI.getType(FpExtSrc->getOperand(1).getReg()))) { MatchInfo = [=, &MI](MachineIRBuilder &B) { auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg()); auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg()); - B.buildInstr( - PreferredFusedOpcode, {MI.getOperand(0).getReg()}, - {FpExtX.getReg(0), FpExtY.getReg(0), LHS->getOperand(0).getReg()}); + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg}); }; return true; } @@ -4957,8 +4984,10 @@ bool CombinerHelper::matchCombineFAddFMAFMulToFMadOrFMA( if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true)) return false; - MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg()); - MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg()); + Register Op1 = MI.getOperand(1).getReg(); + Register Op2 = MI.getOperand(2).getReg(); + DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1}; + DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2}; LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); unsigned PreferredFusedOpcode = @@ -4966,31 +4995,31 @@ bool CombinerHelper::matchCombineFAddFMAFMulToFMadOrFMA( // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), // prefer to fold the multiply with fewer uses. - if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) && - isContractableFMul(*RHS, AllowFusionGlobally)) { - if (hasMoreUses(*LHS, *RHS, MRI)) + if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) && + isContractableFMul(*RHS.MI, AllowFusionGlobally)) { + if (hasMoreUses(*LHS.MI, *RHS.MI, MRI)) std::swap(LHS, RHS); } MachineInstr *FMA = nullptr; Register Z; // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) - if (LHS->getOpcode() == PreferredFusedOpcode && - (MRI.getVRegDef(LHS->getOperand(3).getReg())->getOpcode() == + if (LHS.MI->getOpcode() == PreferredFusedOpcode && + (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() == TargetOpcode::G_FMUL) && - MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg()) && - MRI.hasOneNonDBGUse(LHS->getOperand(3).getReg())) { - FMA = LHS; - Z = RHS->getOperand(0).getReg(); + MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) && + MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) { + FMA = LHS.MI; + Z = RHS.Reg; } // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z)) - else if (RHS->getOpcode() == PreferredFusedOpcode && - (MRI.getVRegDef(RHS->getOperand(3).getReg())->getOpcode() == + else if (RHS.MI->getOpcode() == PreferredFusedOpcode && + (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() == TargetOpcode::G_FMUL) && - MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg()) && - MRI.hasOneNonDBGUse(RHS->getOperand(3).getReg())) { - Z = LHS->getOperand(0).getReg(); - FMA = RHS; + MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) && + MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) { + Z = LHS.Reg; + FMA = RHS.MI; } if (FMA) { @@ -5025,17 +5054,19 @@ bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive( const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering(); LLT DstType = MRI.getType(MI.getOperand(0).getReg()); - MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg()); - MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg()); + Register Op1 = MI.getOperand(1).getReg(); + Register Op2 = MI.getOperand(2).getReg(); + DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1}; + DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2}; unsigned PreferredFusedOpcode = HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), // prefer to fold the multiply with fewer uses. - if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) && - isContractableFMul(*RHS, AllowFusionGlobally)) { - if (hasMoreUses(*LHS, *RHS, MRI)) + if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) && + isContractableFMul(*RHS.MI, AllowFusionGlobally)) { + if (hasMoreUses(*LHS.MI, *RHS.MI, MRI)) std::swap(LHS, RHS); } @@ -5054,16 +5085,17 @@ bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive( MachineInstr *FMulMI, *FMAMI; // fold (fadd (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y, (fma (fpext u), (fpext v), z)) - if (LHS->getOpcode() == PreferredFusedOpcode && - mi_match(LHS->getOperand(3).getReg(), MRI, m_GFPExt(m_MInstr(FMulMI))) && + if (LHS.MI->getOpcode() == PreferredFusedOpcode && + mi_match(LHS.MI->getOperand(3).getReg(), MRI, + m_GFPExt(m_MInstr(FMulMI))) && isContractableFMul(*FMulMI, AllowFusionGlobally) && TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType, MRI.getType(FMulMI->getOperand(0).getReg()))) { MatchInfo = [=](MachineIRBuilder &B) { buildMatchInfo(FMulMI->getOperand(1).getReg(), - FMulMI->getOperand(2).getReg(), - RHS->getOperand(0).getReg(), LHS->getOperand(1).getReg(), - LHS->getOperand(2).getReg(), B); + FMulMI->getOperand(2).getReg(), RHS.Reg, + LHS.MI->getOperand(1).getReg(), + LHS.MI->getOperand(2).getReg(), B); }; return true; } @@ -5073,7 +5105,7 @@ bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive( // FIXME: This turns two single-precision and one double-precision // operation into two double-precision operations, which might not be // interesting for all targets, especially GPUs. - if (mi_match(LHS->getOperand(0).getReg(), MRI, m_GFPExt(m_MInstr(FMAMI))) && + if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) && FMAMI->getOpcode() == PreferredFusedOpcode) { MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg()); if (isContractableFMul(*FMulMI, AllowFusionGlobally) && @@ -5085,8 +5117,7 @@ bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive( X = B.buildFPExt(DstType, X).getReg(0); Y = B.buildFPExt(DstType, Y).getReg(0); buildMatchInfo(FMulMI->getOperand(1).getReg(), - FMulMI->getOperand(2).getReg(), - RHS->getOperand(0).getReg(), X, Y, B); + FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B); }; return true; @@ -5095,16 +5126,17 @@ bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive( // fold (fadd z, (fma x, y, (fpext (fmul u, v))) // -> (fma x, y, (fma (fpext u), (fpext v), z)) - if (RHS->getOpcode() == PreferredFusedOpcode && - mi_match(RHS->getOperand(3).getReg(), MRI, m_GFPExt(m_MInstr(FMulMI))) && + if (RHS.MI->getOpcode() == PreferredFusedOpcode && + mi_match(RHS.MI->getOperand(3).getReg(), MRI, + m_GFPExt(m_MInstr(FMulMI))) && isContractableFMul(*FMulMI, AllowFusionGlobally) && TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType, MRI.getType(FMulMI->getOperand(0).getReg()))) { MatchInfo = [=](MachineIRBuilder &B) { buildMatchInfo(FMulMI->getOperand(1).getReg(), - FMulMI->getOperand(2).getReg(), - LHS->getOperand(0).getReg(), RHS->getOperand(1).getReg(), - RHS->getOperand(2).getReg(), B); + FMulMI->getOperand(2).getReg(), LHS.Reg, + RHS.MI->getOperand(1).getReg(), + RHS.MI->getOperand(2).getReg(), B); }; return true; } @@ -5114,7 +5146,7 @@ bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive( // FIXME: This turns two single-precision and one double-precision // operation into two double-precision operations, which might not be // interesting for all targets, especially GPUs. - if (mi_match(RHS->getOperand(0).getReg(), MRI, m_GFPExt(m_MInstr(FMAMI))) && + if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) && FMAMI->getOpcode() == PreferredFusedOpcode) { MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg()); if (isContractableFMul(*FMulMI, AllowFusionGlobally) && @@ -5126,8 +5158,7 @@ bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive( X = B.buildFPExt(DstType, X).getReg(0); Y = B.buildFPExt(DstType, Y).getReg(0); buildMatchInfo(FMulMI->getOperand(1).getReg(), - FMulMI->getOperand(2).getReg(), - LHS->getOperand(0).getReg(), X, Y, B); + FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B); }; return true; } @@ -5144,16 +5175,18 @@ bool CombinerHelper::matchCombineFSubFMulToFMadOrFMA( if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) return false; - MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg()); - MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg()); + Register Op1 = MI.getOperand(1).getReg(); + Register Op2 = MI.getOperand(2).getReg(); + DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1}; + DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2}; LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), // prefer to fold the multiply with fewer uses. int FirstMulHasFewerUses = true; - if (isContractableFMul(*LHS, AllowFusionGlobally) && - isContractableFMul(*RHS, AllowFusionGlobally) && - hasMoreUses(*LHS, *RHS, MRI)) + if (isContractableFMul(*LHS.MI, AllowFusionGlobally) && + isContractableFMul(*RHS.MI, AllowFusionGlobally) && + hasMoreUses(*LHS.MI, *RHS.MI, MRI)) FirstMulHasFewerUses = false; unsigned PreferredFusedOpcode = @@ -5161,24 +5194,24 @@ bool CombinerHelper::matchCombineFSubFMulToFMadOrFMA( // fold (fsub (fmul x, y), z) -> (fma x, y, -z) if (FirstMulHasFewerUses && - (isContractableFMul(*LHS, AllowFusionGlobally) && - (Aggressive || MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg())))) { + (isContractableFMul(*LHS.MI, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) { MatchInfo = [=, &MI](MachineIRBuilder &B) { - Register NegZ = B.buildFNeg(DstTy, RHS->getOperand(0).getReg()).getReg(0); - B.buildInstr( - PreferredFusedOpcode, {MI.getOperand(0).getReg()}, - {LHS->getOperand(1).getReg(), LHS->getOperand(2).getReg(), NegZ}); + Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0); + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {LHS.MI->getOperand(1).getReg(), + LHS.MI->getOperand(2).getReg(), NegZ}); }; return true; } // fold (fsub x, (fmul y, z)) -> (fma -y, z, x) - else if ((isContractableFMul(*RHS, AllowFusionGlobally) && - (Aggressive || MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg())))) { + else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) { MatchInfo = [=, &MI](MachineIRBuilder &B) { - Register NegY = B.buildFNeg(DstTy, RHS->getOperand(1).getReg()).getReg(0); - B.buildInstr( - PreferredFusedOpcode, {MI.getOperand(0).getReg()}, - {NegY, RHS->getOperand(2).getReg(), LHS->getOperand(0).getReg()}); + Register NegY = + B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0); + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg}); }; return true; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index 306af808659a..64c2f0d5f8e4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -37,6 +37,11 @@ Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) { switch (MI->getOpcode()) { case TargetOpcode::COPY: return computeKnownAlignment(MI->getOperand(1).getReg(), Depth); + case TargetOpcode::G_ASSERT_ALIGN: { + // TODO: Min with source + int64_t LogAlign = MI->getOperand(2).getImm(); + return Align(1ull << LogAlign); + } case TargetOpcode::G_FRAME_INDEX: { int FrameIdx = MI->getOperand(1).getIndex(); return MF.getFrameInfo().getObjectAlign(FrameIdx); @@ -466,6 +471,18 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, Known.Zero.setBitsFrom(SrcBitWidth); break; } + case TargetOpcode::G_ASSERT_ALIGN: { + int64_t LogOfAlign = MI.getOperand(2).getImm(); + if (LogOfAlign == 0) + break; + + // TODO: Should use maximum with source + // If a node is guaranteed to be aligned, set low zero bits accordingly as + // well as clearing one bits. + Known.Zero.setLowBits(LogOfAlign); + Known.One.clearLowBits(LogOfAlign); + break; + } case TargetOpcode::G_MERGE_VALUES: { unsigned NumOps = MI.getNumOperands(); unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp index 4ae427484945..e5f95ca5aa73 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -297,10 +297,8 @@ bool InlineAsmLowering::lowerInlineAsm( GISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); // Compute the value type for each operand. - if (OpInfo.Type == InlineAsm::isInput || - (OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) { - - OpInfo.CallOperandVal = const_cast<Value *>(Call.getArgOperand(ArgNo++)); + if (OpInfo.hasArg()) { + OpInfo.CallOperandVal = const_cast<Value *>(Call.getArgOperand(ArgNo)); if (isa<BasicBlock>(OpInfo.CallOperandVal)) { LLVM_DEBUG(dbgs() << "Basic block input operands not supported yet\n"); @@ -312,10 +310,8 @@ bool InlineAsmLowering::lowerInlineAsm( // If this is an indirect operand, the operand is a pointer to the // accessed type. if (OpInfo.isIndirect) { - PointerType *PtrTy = dyn_cast<PointerType>(OpTy); - if (!PtrTy) - report_fatal_error("Indirect operand for inline asm not a pointer!"); - OpTy = PtrTy->getElementType(); + OpTy = Call.getAttributes().getParamElementType(ArgNo); + assert(OpTy && "Indirect operand must have elementtype attribute"); } // FIXME: Support aggregate input operands @@ -327,7 +323,7 @@ bool InlineAsmLowering::lowerInlineAsm( OpInfo.ConstraintVT = TLI->getAsmOperandValueType(DL, OpTy, true).getSimpleVT(); - + ++ArgNo; } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) { assert(!Call.getType()->isVoidTy() && "Bad inline asm!"); if (StructType *STy = dyn_cast<StructType>(Call.getType())) { @@ -627,7 +623,8 @@ bool InlineAsmLowering::lowerInlineAsm( Register SrcReg = OpInfo.Regs[0]; unsigned SrcSize = TRI->getRegSizeInBits(SrcReg, *MRI); - if (MRI->getType(ResRegs[i]).getSizeInBits() < SrcSize) { + LLT ResTy = MRI->getType(ResRegs[i]); + if (ResTy.isScalar() && ResTy.getSizeInBits() < SrcSize) { // First copy the non-typed virtual register into a generic virtual // register Register Tmp1Reg = @@ -635,9 +632,14 @@ bool InlineAsmLowering::lowerInlineAsm( MIRBuilder.buildCopy(Tmp1Reg, SrcReg); // Need to truncate the result of the register MIRBuilder.buildTrunc(ResRegs[i], Tmp1Reg); - } else { + } else if (ResTy.getSizeInBits() == SrcSize) { MIRBuilder.buildCopy(ResRegs[i], SrcReg); + } else { + LLVM_DEBUG(dbgs() << "Unhandled output operand with " + "mismatched register size\n"); + return false; } + break; } case TargetLowering::C_Immediate: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp index b10c9272a508..2bb5addefe48 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -71,9 +71,10 @@ InstructionSelect::InstructionSelect() void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetPassConfig>(); + AU.addRequired<GISelKnownBitsAnalysis>(); + AU.addPreserved<GISelKnownBitsAnalysis>(); + if (OptLevel != CodeGenOpt::None) { - AU.addRequired<GISelKnownBitsAnalysis>(); - AU.addPreserved<GISelKnownBitsAnalysis>(); AU.addRequired<ProfileSummaryInfoWrapperPass>(); LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); } @@ -97,9 +98,8 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { OptLevel = MF.getFunction().hasOptNone() ? CodeGenOpt::None : MF.getTarget().getOptLevel(); - GISelKnownBits *KB = nullptr; + GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); if (OptLevel != CodeGenOpt::None) { - KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); if (PSI && PSI->hasProfileSummary()) BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp index dc5a4d8f85aa..1d0c106fd5db 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -29,7 +29,7 @@ using namespace llvm; InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers) - : Renderers(MaxRenderers), MIs() {} + : Renderers(MaxRenderers) {} InstructionSelector::InstructionSelector() = default; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index e8a8efd5dad4..37bc8a65dc7c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -564,7 +564,7 @@ static bool isLibCallInTailPosition(MachineInstr &MI, // the return. Ignore NoAlias and NonNull because they don't affect the // call sequence. AttributeList CallerAttrs = F.getAttributes(); - if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex) + if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs()) .removeAttribute(Attribute::NoAlias) .removeAttribute(Attribute::NonNull) .hasAttributes()) @@ -1677,7 +1677,7 @@ LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, // Widen SrcTy to WideTy. This does not affect the result, but since the // user requested this size, it is probably better handled than SrcTy and - // should reduce the total number of legalization artifacts + // should reduce the total number of legalization artifacts. if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) { SrcTy = WideTy; SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0); @@ -3655,7 +3655,6 @@ static bool hasSameNumEltsOnAllVectorOperands( if (!Ty.isVector()) { if (!is_contained(NonVecOpIndices, OpIdx)) return false; - is_contained(NonVecOpIndices, OpIdx); continue; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp index a1acc4195840..328a278f3d68 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -124,14 +124,13 @@ bool Localizer::localizeInterBlock(MachineFunction &MF, LocalizedInstrs.insert(LocalizedMI); MachineInstr &UseMI = *MOUse.getParent(); if (MRI->hasOneUse(Reg) && !UseMI.isPHI()) - InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(UseMI), LocalizedMI); + InsertMBB->insert(UseMI, LocalizedMI); else InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()), LocalizedMI); // Set a new register for the definition. - Register NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg)); - MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg)); + Register NewReg = MRI->cloneVirtualRegister(Reg); LocalizedMI->getOperand(0).setReg(NewReg); NewVRegIt = MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first; @@ -174,9 +173,10 @@ bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) { while (II != MBB.end() && !Users.count(&*II)) ++II; - LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *&*II - << "\n"); assert(II != MBB.end() && "Didn't find the user in the MBB"); + LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *II + << '\n'); + MI->removeFromParent(); MBB.insert(II, MI); Changed = true; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 391251886fbb..c6720568b362 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -282,18 +282,6 @@ MachineInstrBuilder MachineIRBuilder::buildCopy(const DstOp &Res, return buildInstr(TargetOpcode::COPY, Res, Op); } -MachineInstrBuilder MachineIRBuilder::buildAssertSExt(const DstOp &Res, - const SrcOp &Op, - unsigned Size) { - return buildInstr(TargetOpcode::G_ASSERT_SEXT, Res, Op).addImm(Size); -} - -MachineInstrBuilder MachineIRBuilder::buildAssertZExt(const DstOp &Res, - const SrcOp &Op, - unsigned Size) { - return buildInstr(TargetOpcode::G_ASSERT_ZEXT, Res, Op).addImm(Size); -} - MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res, const ConstantInt &Val) { LLT Ty = Res.getLLTTy(*getMRI()); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 937d94764be1..01af6bb51bb7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -626,7 +626,8 @@ bool RegBankSelect::assignInstr(MachineInstr &MI) { unsigned Opc = MI.getOpcode(); if (isPreISelGenericOptimizationHint(Opc)) { assert((Opc == TargetOpcode::G_ASSERT_ZEXT || - Opc == TargetOpcode::G_ASSERT_SEXT) && + Opc == TargetOpcode::G_ASSERT_SEXT || + Opc == TargetOpcode::G_ASSERT_ALIGN) && "Unexpected hint opcode!"); // The only correct mapping for these is to always use the source register // bank. @@ -856,7 +857,7 @@ void RegBankSelect::RepairingPlacement::addInsertPoint( RegBankSelect::InstrInsertPoint::InstrInsertPoint(MachineInstr &Instr, bool Before) - : InsertPoint(), Instr(Instr), Before(Before) { + : Instr(Instr), Before(Before) { // Since we do not support splitting, we do not need to update // liveness and such, so do not do anything with P. assert((!Before || !Instr.isPHI()) && diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 4981a537dc7c..544af9a2954f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -592,17 +592,17 @@ Optional<APFloat> llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, return None; } -Optional<MachineInstr *> -llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, - const Register Op2, - const MachineRegisterInfo &MRI, - MachineIRBuilder &MIB) { - auto *SrcVec1 = getOpcodeDef<GBuildVector>(Op1, MRI); - if (!SrcVec1) - return None; +Register llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, + const Register Op2, + const MachineRegisterInfo &MRI, + MachineIRBuilder &MIB) { auto *SrcVec2 = getOpcodeDef<GBuildVector>(Op2, MRI); if (!SrcVec2) - return None; + return Register(); + + auto *SrcVec1 = getOpcodeDef<GBuildVector>(Op1, MRI); + if (!SrcVec1) + return Register(); const LLT EltTy = MRI.getType(SrcVec1->getSourceReg(0)); @@ -611,14 +611,14 @@ llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, auto MaybeCst = ConstantFoldBinOp(Opcode, SrcVec1->getSourceReg(Idx), SrcVec2->getSourceReg(Idx), MRI); if (!MaybeCst) - return None; + return Register(); auto FoldedCstReg = MIB.buildConstant(EltTy, *MaybeCst).getReg(0); FoldedElements.emplace_back(FoldedCstReg); } // Create the new vector constant. auto CstVec = MIB.buildBuildVector(MRI.getType(SrcVec1->getReg(0)), FoldedElements); - return &*CstVec; + return CstVec.getReg(0); } bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, @@ -704,8 +704,7 @@ Register llvm::getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, - LLT RegTy) { - DebugLoc DL; // FIXME: Is no location the right choice? + const DebugLoc &DL, LLT RegTy) { MachineBasicBlock &EntryMBB = MF.front(); MachineRegisterInfo &MRI = MF.getRegInfo(); Register LiveIn = MRI.getLiveInVirtReg(PhysReg); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp index 9fabcfb1f326..2ee9379cb286 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -185,7 +185,7 @@ class Polynomial { APInt A; public: - Polynomial(Value *V) : ErrorMSBs((unsigned)-1), V(V), B(), A() { + Polynomial(Value *V) : ErrorMSBs((unsigned)-1), V(V) { IntegerType *Ty = dyn_cast<IntegerType>(V->getType()); if (Ty) { ErrorMSBs = 0; @@ -195,12 +195,12 @@ public: } Polynomial(const APInt &A, unsigned ErrorMSBs = 0) - : ErrorMSBs(ErrorMSBs), V(NULL), B(), A(A) {} + : ErrorMSBs(ErrorMSBs), V(nullptr), A(A) {} Polynomial(unsigned BitWidth, uint64_t A, unsigned ErrorMSBs = 0) - : ErrorMSBs(ErrorMSBs), V(NULL), B(), A(BitWidth, A) {} + : ErrorMSBs(ErrorMSBs), V(nullptr), A(BitWidth, A) {} - Polynomial() : ErrorMSBs((unsigned)-1), V(NULL), B(), A() {} + Polynomial() : ErrorMSBs((unsigned)-1), V(nullptr) {} /// Increment and clamp the number of undefined bits. void incErrorMSBs(unsigned amt) { @@ -677,7 +677,7 @@ public: FixedVectorType *const VTy; VectorInfo(FixedVectorType *VTy) - : BB(nullptr), PV(nullptr), LIs(), Is(), SVI(nullptr), VTy(VTy) { + : BB(nullptr), PV(nullptr), SVI(nullptr), VTy(VTy) { EI = new ElementInfo[VTy->getNumElements()]; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index e97dcca201e8..8a190e769941 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -251,9 +251,10 @@ public: /// creates DBG_VALUEs and puts them in #Transfers, then prepares the other /// object fields to track variable locations as we step through the block. /// FIXME: could just examine mloctracker instead of passing in \p mlocs? - void loadInlocs(MachineBasicBlock &MBB, ValueIDNum *MLocs, - SmallVectorImpl<std::pair<DebugVariable, DbgValue>> &VLocs, - unsigned NumLocs) { + void + loadInlocs(MachineBasicBlock &MBB, ValueIDNum *MLocs, + const SmallVectorImpl<std::pair<DebugVariable, DbgValue>> &VLocs, + unsigned NumLocs) { ActiveMLocs.clear(); ActiveVLocs.clear(); VarLocs.clear(); @@ -272,7 +273,7 @@ public: }; // Map of the preferred location for each value. - std::map<ValueIDNum, LocIdx> ValueToLoc; + DenseMap<ValueIDNum, LocIdx> ValueToLoc; ActiveMLocs.reserve(VLocs.size()); ActiveVLocs.reserve(VLocs.size()); @@ -283,6 +284,11 @@ public: LocIdx Idx = Location.Idx; ValueIDNum &VNum = MLocs[Idx.asU64()]; VarLocs.push_back(VNum); + + // Short-circuit unnecessary preferred location update. + if (VLocs.empty()) + continue; + auto it = ValueToLoc.find(VNum); // In order of preference, pick: // * Callee saved registers, @@ -298,7 +304,7 @@ public: } // Now map variables to their picked LocIdxes. - for (auto Var : VLocs) { + for (const auto &Var : VLocs) { if (Var.second.Kind == DbgValue::Const) { PendingDbgValues.push_back( emitMOLoc(*Var.second.MO, Var.first, Var.second.Properties)); @@ -413,7 +419,8 @@ public: return Reg != SP && Reg != FP; } - bool recoverAsEntryValue(const DebugVariable &Var, DbgValueProperties &Prop, + bool recoverAsEntryValue(const DebugVariable &Var, + const DbgValueProperties &Prop, const ValueIDNum &Num) { // Is this variable location a candidate to be an entry value. First, // should we be trying this at all? @@ -2799,31 +2806,28 @@ void InstrRefBasedLDV::emitLocations( } } - // We have to insert DBG_VALUEs in a consistent order, otherwise they appeaer - // in DWARF in different orders. Use the order that they appear when walking - // through each block / each instruction, stored in AllVarsNumbering. - auto OrderDbgValues = [&](const MachineInstr *A, - const MachineInstr *B) -> bool { - DebugVariable VarA(A->getDebugVariable(), A->getDebugExpression(), - A->getDebugLoc()->getInlinedAt()); - DebugVariable VarB(B->getDebugVariable(), B->getDebugExpression(), - B->getDebugLoc()->getInlinedAt()); - return AllVarsNumbering.find(VarA)->second < - AllVarsNumbering.find(VarB)->second; - }; - // Go through all the transfers recorded in the TransferTracker -- this is // both the live-ins to a block, and any movements of values that happen // in the middle. - for (auto &P : TTracker->Transfers) { - // Sort them according to appearance order. - llvm::sort(P.Insts, OrderDbgValues); + for (const auto &P : TTracker->Transfers) { + // We have to insert DBG_VALUEs in a consistent order, otherwise they + // appear in DWARF in different orders. Use the order that they appear + // when walking through each block / each instruction, stored in + // AllVarsNumbering. + SmallVector<std::pair<unsigned, MachineInstr *>> Insts; + for (MachineInstr *MI : P.Insts) { + DebugVariable Var(MI->getDebugVariable(), MI->getDebugExpression(), + MI->getDebugLoc()->getInlinedAt()); + Insts.emplace_back(AllVarsNumbering.find(Var)->second, MI); + } + llvm::sort(Insts, + [](const auto &A, const auto &B) { return A.first < B.first; }); + // Insert either before or after the designated point... if (P.MBB) { MachineBasicBlock &MBB = *P.MBB; - for (auto *MI : P.Insts) { - MBB.insert(P.Pos, MI); - } + for (const auto &Pair : Insts) + MBB.insert(P.Pos, Pair.second); } else { // Terminators, like tail calls, can clobber things. Don't try and place // transfers after them. @@ -2831,9 +2835,8 @@ void InstrRefBasedLDV::emitLocations( continue; MachineBasicBlock &MBB = *P.Pos->getParent(); - for (auto *MI : P.Insts) { - MBB.insertAfterBundle(P.Pos, MI); - } + for (const auto &Pair : Insts) + MBB.insertAfterBundle(P.Pos, Pair.second); } } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h index 789205e61cdb..9e9c0ce394fd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h @@ -494,7 +494,7 @@ public: return StackIdxesToPos.find(Idx)->second; } - unsigned getNumLocs(void) const { return LocIdxToIDNum.size(); } + unsigned getNumLocs() const { return LocIdxToIDNum.size(); } /// Reset all locations to contain a PHI value at the designated block. Used /// sometimes for actual PHI values, othertimes to indicate the block entry @@ -516,7 +516,7 @@ public: } /// Wipe any un-necessary location records after traversing a block. - void reset(void) { + void reset() { // We could reset all the location values too; however either loadFromArray // or setMPhis should be called before this object is re-used. Just // clear Masks, they're definitely not needed. @@ -525,7 +525,7 @@ public: /// Clear all data. Destroys the LocID <=> LocIdx map, which makes most of /// the information in this pass uninterpretable. - void clear(void) { + void clear() { reset(); LocIDToLocIdx.clear(); LocIdxToLocID.clear(); @@ -1082,7 +1082,9 @@ template <> struct DenseMapInfo<ValueIDNum> { return ValueIDNum::TombstoneValue; } - static unsigned getHashValue(const ValueIDNum &Val) { return Val.asU64(); } + static unsigned getHashValue(const ValueIDNum &Val) { + return hash_value(Val.asU64()); + } static bool isEqual(const ValueIDNum &A, const ValueIDNum &B) { return A == B; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp index 691977dc34e6..8f697611a82c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp @@ -40,6 +40,10 @@ static cl::opt<bool> "normal DBG_VALUE inputs"), cl::init(false)); +static cl::opt<cl::boolOrDefault> ValueTrackingVariableLocations( + "experimental-debug-variable-locations", + cl::desc("Use experimental new value-tracking variable locations")); + // Options to prevent pathological compile-time behavior. If InputBBLimit and // InputDbgValueLimit are both exceeded, range extension is disabled. static cl::opt<unsigned> InputBBLimit( @@ -117,3 +121,8 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { return TheImpl->ExtendRanges(MF, DomTree, TPC, InputBBLimit, InputDbgValueLimit); } + +bool llvm::debuginfoShouldUseDebugInstrRef(const Triple &T) { + // Enable if explicitly requested on command line. + return ValueTrackingVariableLocations == cl::boolOrDefault::BOU_TRUE; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h index a5936c8a96f0..8f0b2ec3e1fc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h @@ -12,6 +12,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/ADT/Triple.h" namespace llvm { @@ -35,6 +36,9 @@ public: // Factory functions for LiveDebugValues implementations. extern LDVImpl *makeVarLocBasedLiveDebugValues(); extern LDVImpl *makeInstrRefBasedLiveDebugValues(); + +extern bool debuginfoShouldUseDebugInstrRef(const Triple &T); + } // namespace llvm #endif // LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp index e6661e5135c3..6d806135240e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -152,7 +152,7 @@ public: } } - DbgVariableValue() : LocNoCount(0), WasIndirect(0), WasList(0) {} + DbgVariableValue() : LocNoCount(0), WasIndirect(false), WasList(false) {} DbgVariableValue(const DbgVariableValue &Other) : LocNoCount(Other.LocNoCount), WasIndirect(Other.getWasIndirect()), WasList(Other.getWasList()), Expression(Other.getExpression()) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp index 2f97386b6d18..9571afa434c1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp @@ -827,6 +827,8 @@ CancelKill: MachineBasicBlock* LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const { + assert(!LI.empty() && "LiveInterval is empty."); + // A local live range must be fully contained inside the block, meaning it is // defined and killed at instructions, not at block boundaries. It is not // live in or out of any block. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 1a04e1ca56a9..6477965bdc21 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -875,11 +875,11 @@ bool MIParser::parseBasicBlock(MachineBasicBlock &MBB, // N.B: Multiple lists of successors and liveins are allowed and they're // merged into one. // Example: - // liveins: %edi - // liveins: %esi + // liveins: $edi + // liveins: $esi // // is equivalent to - // liveins: %edi, %esi + // liveins: $edi, $esi bool ExplicitSuccessors = false; while (true) { if (Token.is(MIToken::kw_successors)) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index d0323eaf3d78..f144639770bc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -182,8 +182,7 @@ static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) { MIRParserImpl::MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, StringRef Filename, LLVMContext &Context, std::function<void(Function &)> Callback) - : SM(), - Context(Context), + : Context(Context), In(SM.getMemoryBuffer(SM.AddNewSourceBuffer(std::move(Contents), SMLoc())) ->getBuffer(), nullptr, handleYAMLDiag, this), diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp new file mode 100644 index 000000000000..a74c57690640 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp @@ -0,0 +1,862 @@ +//===- MLRegAllocEvictAdvisor.cpp - ML eviction advisor -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of the ML eviction advisor and reward injection pass +// +//===----------------------------------------------------------------------===// + +#include "RegAllocEvictionAdvisor.h" +#include "RegAllocGreedy.h" +#include "RegAllocScore.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MLModelRunner.h" +#include "llvm/Analysis/ModelUnderTrainingRunner.h" +#include "llvm/Analysis/NoInferenceModelRunner.h" +#include "llvm/Analysis/ReleaseModeModelRunner.h" +#include "llvm/Analysis/Utils/TFUtils.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Config/config.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetMachine.h" + +#include <array> +#include <memory> + +using namespace llvm; + +#define DEBUG_TYPE "ml-regalloc" + +// Generated header in release (AOT) mode +#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) +#include "RegallocEvictModel.h" +#endif + +// Options that only make sense in development mode +#ifdef LLVM_HAVE_TF_API +static cl::opt<std::string> TrainingLog( + "regalloc-training-log", cl::Hidden, + cl::desc("Training log for the register allocator eviction model")); + +static cl::opt<std::string> ModelUnderTraining( + "regalloc-model", cl::Hidden, + cl::desc("The model being trained for register allocation eviction")); + +#endif // #ifdef LLVM_HAVE_TF_API + +/// The score injection pass. +/// This pass calculates the score for a function and inserts it in the log, but +/// this happens only in development mode. It's a no-op otherwise. +namespace llvm { +class RegAllocScoring : public MachineFunctionPass { +public: + static char ID; + + RegAllocScoring() : MachineFunctionPass(ID) { + initializeRegAllocScoringPass(*PassRegistry::getPassRegistry()); + } + + ~RegAllocScoring() override = default; + + StringRef getPassName() const override { + return "Register Allocation Pass Scoring"; + } + + /// RegAllocReward analysis usage. + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired<RegAllocEvictionAdvisorAnalysis>(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<AAResultsWrapperPass>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + /// Performs this pass + bool runOnMachineFunction(MachineFunction &) override; +}; + +char RegAllocScoring::ID = 0; +FunctionPass *createRegAllocScoringPass() { return new RegAllocScoring(); } + +} // namespace llvm + +INITIALIZE_PASS(RegAllocScoring, "regallocscoringpass", + "Register Allocation Scoring Pass", false, false) + +// =================================== +// Common ML Advisor declarations +// =================================== +namespace { +// This is the maximum number of interfererring ranges. That's the number of +// distinct AllocationOrder values, which comes from MCRegisterClass::RegsSize. +// For X86, that's 32. +// TODO: find a way to get this, statically, in a programmatic way. +static const int64_t MaxInterferences = 32; + +// Logically, we can think of the feature set given to the evaluator as a 2D +// matrix. The rows are the features (see next). The columns correspond to the +// interferences. We treat the candidate virt reg as an 'interference', too, as +// its feature set is the same as that of the interferring ranges. So we'll have +// MaxInterferences + 1 columns and by convention, we will use the last column +// for the virt reg seeking allocation. +static const int64_t CandidateVirtRegPos = MaxInterferences; +static const int64_t NumberOfInterferences = CandidateVirtRegPos + 1; + +// Most features are as described above, so we'll reuse this vector in defining +// them. +static const std::vector<int64_t> PerLiveRangeShape{1, NumberOfInterferences}; + +// -------------- +// Features table +// -------------- +// For each interfering live range (incl. the candidate) we collect a number of +// features. However, because the features are of different types (and because +// of ML best practices), we organize the tensors per feature, not per +// candidate. Each such tensor has a scalar value corresponding to the +// interferring live range at that position, in the order in AllocationOrder. +// The last position corresponds to the virt reg seeking allocation. +// Exception to all that is the progression feature, which is just a scalar (see +// its documentation for details). +// Note on naming: the "_by_max" are normalized using the largest value of that +// tensor, as observed in the current decision making stage (i.e. for the +// current call to the advisor's tryFindEvictionCandidate) +// +// The feature list format: type, name, shape, documentation. +// Note: we can really just use int64 and float, hence the modeling of some +// bools as int64 values. +#define RA_EVICT_FEATURES_LIST(M) \ + M(int64_t, mask, PerLiveRangeShape, \ + "boolean values, 0 for unavailable candidates (i.e. if a position is 0, " \ + "it " \ + "can't be evicted)") \ + M(int64_t, is_free, PerLiveRangeShape, \ + "boolean values, 1 if this phys reg is actually free (no interferences)") \ + M(float, nr_urgent, PerLiveRangeShape, \ + "number of 'urgent' intervals, normalized. Urgent are those that are OK " \ + "to break cascades") \ + M(float, nr_broken_hints, PerLiveRangeShape, \ + "if this position were evicted, how many broken hints would there be") \ + M(int64_t, is_hint, PerLiveRangeShape, \ + "is this a preferred phys reg for the candidate") \ + M(int64_t, is_local, PerLiveRangeShape, \ + "is this live range local to a basic block") \ + M(float, nr_rematerializable, PerLiveRangeShape, \ + "nr rematerializable ranges") \ + M(float, nr_defs_and_uses, PerLiveRangeShape, \ + "bb freq - weighed nr defs and uses") \ + M(float, weighed_reads_by_max, PerLiveRangeShape, \ + "bb freq - weighed nr of reads, normalized") \ + M(float, weighed_writes_by_max, PerLiveRangeShape, \ + "bb feq - weighed nr of writes, normalized") \ + M(float, weighed_read_writes_by_max, PerLiveRangeShape, \ + "bb freq - weighed nr of uses that are both read and writes, normalized") \ + M(float, weighed_indvars_by_max, PerLiveRangeShape, \ + "bb freq - weighed nr of uses that are indvars, normalized") \ + M(float, hint_weights_by_max, PerLiveRangeShape, \ + "bb freq - weighed nr of uses that are hints, normalized") \ + M(float, start_bb_freq_by_max, PerLiveRangeShape, \ + "the freq in the start block, normalized") \ + M(float, end_bb_freq_by_max, PerLiveRangeShape, \ + "freq of end block, normalized") \ + M(float, hottest_bb_freq_by_max, PerLiveRangeShape, \ + "hottest BB freq, normalized") \ + M(float, liverange_size, PerLiveRangeShape, \ + "size (instr index diff) of the LR") \ + M(float, use_def_density, PerLiveRangeShape, \ + "the max weight, as computed by the manual heuristic") \ + M(int64_t, max_stage, PerLiveRangeShape, \ + "largest stage of an interval in this LR") \ + M(int64_t, min_stage, PerLiveRangeShape, \ + "lowest stage of an interval in this LR") \ + M(float, progress, {1}, "ratio of current queue size to initial size") + +// The model learns to pick one of the mask == 1 interferences. This is the name +// of the output tensor. +// The contract with the model is that the output will be guaranteed to be to a +// mask == 1 position. +// Using a macro here to avoid 'not used' warnings (and keep cond compilation to +// a minimum) +#define DecisionName "index_to_evict" + +// Named features index. +enum FeatureIDs { +#define _FEATURE_IDX(_, name, __, ___) name, + RA_EVICT_FEATURES_LIST(_FEATURE_IDX) +#undef _FEATURE_IDX + FeatureCount +}; + +// The ML advisor will typically have a sparse input to the evaluator, because +// various phys regs won't be available. It's easier (maintenance-wise) to +// bulk-reset the state of the evaluator each time we are about to use it again. +template <typename T> size_t getTotalSize(const std::vector<int64_t> &Shape) { + size_t Ret = sizeof(T); + for (const auto V : Shape) + Ret *= V; + return Ret; +} + +void resetInputs(MLModelRunner &Runner) { +#define _RESET(TYPE, NAME, SHAPE, __) \ + std::memset(Runner.getTensorUntyped(FeatureIDs::NAME), 0, \ + getTotalSize<TYPE>(SHAPE)); + RA_EVICT_FEATURES_LIST(_RESET) +#undef _RESET +} + +using CandidateRegList = + std::array<std::pair<MCRegister, bool>, NumberOfInterferences>; +using FeaturesListNormalizer = std::array<float, FeatureIDs::FeatureCount>; + +/// The ML evictor (commonalities between release and development mode) +class MLEvictAdvisor : public RegAllocEvictionAdvisor { +public: + MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA, + MLModelRunner *Runner, const MachineBlockFrequencyInfo &MBFI, + const MachineLoopInfo &Loops); + +protected: + const RegAllocEvictionAdvisor &getDefaultAdvisor() const { + return static_cast<const RegAllocEvictionAdvisor &>(DefaultAdvisor); + } + + // The assumption is that if the Runner could not be constructed, we emit-ed + // error, and we shouldn't be asking for it here. + const MLModelRunner &getRunner() const { return *Runner; } + + /// This just calls Evaluate on the Runner, but in the development mode case, + /// if we're just capturing the log of the default advisor, it needs to call + /// the latter instead, so we need to pass all the necessary parameters for + /// it. In the development case, it will also log. + virtual int64_t tryFindEvictionCandidatePosition( + LiveInterval &VirtReg, const AllocationOrder &Order, unsigned OrderLimit, + uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const; + + /// Load the features of the given VirtReg (allocated or not) at column Pos, + /// but if that can't be evicted, return false instead. + bool + loadInterferenceFeatures(LiveInterval &VirtReg, MCRegister PhysReg, + bool IsHint, const SmallVirtRegSet &FixedRegisters, + std::array<float, FeatureIDs::FeatureCount> &Largest, + size_t Pos) const; + +private: + static float getInitialQueueSize(const MachineFunction &MF); + + MCRegister tryFindEvictionCandidate( + LiveInterval &VirtReg, const AllocationOrder &Order, + uint8_t CostPerUseLimit, + const SmallVirtRegSet &FixedRegisters) const override; + + void extractFeatures(const SmallVectorImpl<LiveInterval *> &Intervals, + std::array<float, FeatureIDs::FeatureCount> &Largest, + size_t Pos, int64_t IsHint, int64_t LocalIntfsCount, + float NrUrgent) const; + + // Point-in-time: we didn't learn this, so we always delegate to the default. + bool canEvictHintInterference( + LiveInterval &VirtReg, MCRegister PhysReg, + const SmallVirtRegSet &FixedRegisters) const override { + return getDefaultAdvisor().canEvictHintInterference(VirtReg, PhysReg, + FixedRegisters); + } + + // Hold on to a default advisor for: + // 1) the implementation of canEvictHintInterference, because we didn't learn + // that nuance yet; + // 2) for bootstrapping (logging) in the development mode case. + const DefaultEvictionAdvisor DefaultAdvisor; + MLModelRunner *const Runner; + const MachineBlockFrequencyInfo &MBFI; + const MachineLoopInfo &Loops; + + // Indices of those features we don't want to normalize. + // This could be static and shared, but its initialization is non-trivial. + std::bitset<FeatureIDs::FeatureCount> DoNotNormalize; + const float InitialQSize; +}; + +// =================================== +// Release (AOT) - specifics +// =================================== +#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) +const std::array<std::string, FeatureIDs::FeatureCount> FeatureNames{ +#define _GETNAME(_, NAME, __, ___) #NAME, + RA_EVICT_FEATURES_LIST(_GETNAME) +#undef _GETNAME +}; +class ReleaseModeEvictionAdvisorAnalysis final + : public RegAllocEvictionAdvisorAnalysis { +public: + ReleaseModeEvictionAdvisorAnalysis() + : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Release) {} + // support for isa<> and dyn_cast. + static bool classof(const RegAllocEvictionAdvisorAnalysis *R) { + return R->getAdvisorMode() == AdvisorMode::Release; + } + +private: + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineLoopInfo>(); + RegAllocEvictionAdvisorAnalysis::getAnalysisUsage(AU); + } + + std::unique_ptr<RegAllocEvictionAdvisor> + getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { + if (!Runner) + Runner = std::make_unique<ReleaseModeModelRunner<RegallocEvictModel>>( + MF.getFunction().getContext(), FeatureNames, DecisionName); + return std::make_unique<MLEvictAdvisor>( + MF, RA, Runner.get(), getAnalysis<MachineBlockFrequencyInfo>(), + getAnalysis<MachineLoopInfo>()); + } + std::unique_ptr<ReleaseModeModelRunner<RegallocEvictModel>> Runner; +}; +#endif + +// =================================== +// Development mode-specifics +// =================================== +// +// Features we log +#ifdef LLVM_HAVE_TF_API +#define _DECL_FEATURES(type, name, shape, _) \ + TensorSpec::createSpec<type>(#name, shape), + +static const std::vector<TensorSpec> InputFeatures{ + {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}, +}; +#undef _DECL_FEATURES +static const TensorSpec Output = + TensorSpec::createSpec<int64_t>(DecisionName, {1}); +static const TensorSpec Reward = TensorSpec::createSpec<float>("reward", {1}); + +// Features we bind on the model. The tensor names have a prefix, and we also +// need to include some tensors that are expected to be present by the training +// algo. +// TODO: can we just get rid of these? +#define _DECL_TRAIN_FEATURES(type, name, shape, _) \ + TensorSpec::createSpec<type>(std::string("action_") + #name, shape), + +static const std::vector<TensorSpec> TrainingInputFeatures{ + {RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) + TensorSpec::createSpec<float>("action_discount", {1}), + TensorSpec::createSpec<int32_t>("action_step_type", {1}), + TensorSpec::createSpec<float>("action_reward", {1})}}; +#undef _DECL_TRAIN_FEATURES + +class DevelopmentModeEvictAdvisor : public MLEvictAdvisor { +public: + DevelopmentModeEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA, + MLModelRunner *Runner, + const MachineBlockFrequencyInfo &MBFI, + const MachineLoopInfo &Loops, Logger *Log) + : MLEvictAdvisor(MF, RA, Runner, MBFI, Loops), Log(Log) {} + +private: + int64_t tryFindEvictionCandidatePosition( + LiveInterval &VirtReg, const AllocationOrder &Order, unsigned OrderLimit, + uint8_t CostPerUseLimit, + const SmallVirtRegSet &FixedRegisters) const override; + + Logger *const Log; +}; + +class DevelopmentModeEvictionAdvisorAnalysis final + : public RegAllocEvictionAdvisorAnalysis { +public: + DevelopmentModeEvictionAdvisorAnalysis() + : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Development) {} + // support for isa<> and dyn_cast. + static bool classof(const RegAllocEvictionAdvisorAnalysis *R) { + return R->getAdvisorMode() == AdvisorMode::Development; + } + + /// get the logger for the given function, or nullptr if we didn't collect + /// one. This is used to inject the score by the RegAllocScoring pass. + Logger *getLogger(const MachineFunction &MF) const { + auto I = LogMap.find(MF.getName()); + if (I == LogMap.end()) + return nullptr; + return I->second.get(); + } + +private: + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineLoopInfo>(); + RegAllocEvictionAdvisorAnalysis::getAnalysisUsage(AU); + } + + // Save all the logs (when requested). + bool doFinalization(Module &M) override { + if (TrainingLog.empty()) + return false; + std::error_code EC; + auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC); + if (EC) { + M.getContext().emitError(EC.message() + ":" + TrainingLog); + return false; + } + Logger::flushLogs(*OS, LogMap); + return false; + } + + std::unique_ptr<RegAllocEvictionAdvisor> + getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { + LLVMContext &Ctx = MF.getFunction().getContext(); + if (ModelUnderTraining.empty() && TrainingLog.empty()) { + Ctx.emitError("Regalloc development mode should be requested with at " + "least logging enabled and/or a training model"); + return nullptr; + } + if (!Runner) { + if (ModelUnderTraining.empty()) + Runner = std::make_unique<NoInferenceModelRunner>(Ctx, InputFeatures); + else + Runner = ModelUnderTrainingRunner::createAndEnsureValid( + Ctx, ModelUnderTraining, DecisionName, TrainingInputFeatures); + if (!Runner) { + Ctx.emitError("Regalloc: could not set up the model runner"); + return nullptr; + } + } + + Logger *Log = nullptr; + if (!TrainingLog.empty()) { + std::vector<LoggedFeatureSpec> LFS; + for (const auto &FS : InputFeatures) + LFS.push_back({FS, None}); + if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(Runner.get())) + if (MUTR->outputLoggedFeatureSpecs().size() > 1) + append_range(LFS, drop_begin(MUTR->outputLoggedFeatureSpecs())); + // We always log the output; in particular, if we're not evaluating, we + // don't have an output spec json file. That's why we handle the + // 'normal' output separately. + LFS.push_back({Output, None}); + auto I = LogMap.insert(std::make_pair( + MF.getFunction().getName(), + std::make_unique<Logger>(LFS, Reward, /*IncludeReward*/ true))); + assert(I.second); + Log = I.first->second.get(); + } + return std::make_unique<DevelopmentModeEvictAdvisor>( + MF, RA, Runner.get(), getAnalysis<MachineBlockFrequencyInfo>(), + getAnalysis<MachineLoopInfo>(), Log); + } + + std::unique_ptr<MLModelRunner> Runner; + StringMap<std::unique_ptr<Logger>> LogMap; +}; +#endif //#ifdef LLVM_HAVE_TF_API +} // namespace + +float MLEvictAdvisor::getInitialQueueSize(const MachineFunction &MF) { + auto &MRI = MF.getRegInfo(); + float Ret = 0.0; + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { + Register Reg = Register::index2VirtReg(I); + if (MRI.reg_nodbg_empty(Reg)) + continue; + ++Ret; + } + return Ret; +} + +MLEvictAdvisor::MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA, + MLModelRunner *Runner, + const MachineBlockFrequencyInfo &MBFI, + const MachineLoopInfo &Loops) + : RegAllocEvictionAdvisor(MF, RA), DefaultAdvisor(MF, RA), + Runner(std::move(Runner)), MBFI(MBFI), Loops(Loops), + InitialQSize(MLEvictAdvisor::getInitialQueueSize(MF)) { + assert(this->Runner); + DoNotNormalize.set(FeatureIDs::mask); + DoNotNormalize.set(FeatureIDs::is_free); + DoNotNormalize.set(FeatureIDs::is_hint); + DoNotNormalize.set(FeatureIDs::is_local); + DoNotNormalize.set(FeatureIDs::min_stage); + DoNotNormalize.set(FeatureIDs::max_stage); + DoNotNormalize.set(FeatureIDs::progress); +} + +int64_t MLEvictAdvisor::tryFindEvictionCandidatePosition( + LiveInterval &, const AllocationOrder &, unsigned, uint8_t, + const SmallVirtRegSet &) const { + int64_t Ret = Runner->evaluate<int64_t>(); + assert(Ret >= 0); + assert(Ret <= CandidateVirtRegPos); + return Ret; +} + +bool MLEvictAdvisor::loadInterferenceFeatures( + LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, + const SmallVirtRegSet &FixedRegisters, FeaturesListNormalizer &Largest, + size_t Pos) const { + // It is only possible to evict virtual register interference. + if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) { + // leave unavailable + return false; + } + + const bool IsLocal = LIS->intervalIsInOneMBB(VirtReg); + int64_t LocalIntfs = 0; + float NrUrgent = 0.0f; + + // The cascade tracking is the same as in the default advisor + unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg()); + + SmallVector<LiveInterval *, MaxInterferences> InterferingIntervals; + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); + // Different from the default heuristic, we don't make any assumptions about + // what having more than 10 results in the query may mean. + const auto &IFIntervals = Q.interferingVRegs(); + if (IFIntervals.empty() && InterferingIntervals.empty()) + continue; + InterferingIntervals.append(IFIntervals.begin(), IFIntervals.end()); + for (LiveInterval *Intf : reverse(IFIntervals)) { + assert(Register::isVirtualRegister(Intf->reg()) && + "Only expecting virtual register interference from query"); + // This is the same set of legality checks as in the default case: don't + // try to evict fixed regs or 'done' ones. Also don't break cascades, + // except in the urgent case, with the same nuances used in the default + // heuristic. + // We could try sharing this between the advisors, but it may end up + // more complex than it is right now. + if (FixedRegisters.count(Intf->reg())) + return false; + if (RA.getExtraInfo().getStage(*Intf) == RS_Done) + return false; + bool Urgent = + !VirtReg.isSpillable() && + (Intf->isSpillable() || + RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg())) < + RegClassInfo.getNumAllocatableRegs( + MRI->getRegClass(Intf->reg()))); + // Only evict older cascades or live ranges without a cascade. + unsigned IntfCascade = RA.getExtraInfo().getCascade(Intf->reg()); + if (Cascade <= IntfCascade) { + if (!Urgent) + return false; + ++NrUrgent; + } + + LocalIntfs += (IsLocal && LIS->intervalIsInOneMBB(*Intf) && + (!EnableLocalReassign || !canReassign(*Intf, PhysReg))); + } + } + // OK, so if we made it this far, this LR is an eviction candidate, load its + // features. + extractFeatures(InterferingIntervals, Largest, Pos, IsHint, LocalIntfs, + NrUrgent); + return true; +} + +MCRegister MLEvictAdvisor::tryFindEvictionCandidate( + LiveInterval &VirtReg, const AllocationOrder &Order, + uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const { + auto MaybeOrderLimit = getOrderLimit(VirtReg, Order, CostPerUseLimit); + if (!MaybeOrderLimit) + return MCRegister::NoRegister; + unsigned OrderLimit = *MaybeOrderLimit; + + // The heuristic sets initial costs such as, if CostPerUseLimit is + // max<uint8_t>, then any of the costs of the legally-evictable intervals + // would be lower. When that happens, one of those will be selected. + // Therefore, we allow the candidate be selected, unless the candidate is + // unspillable, in which case it would be incorrect to not find a register for + // it. + const bool MustFindEviction = + (!VirtReg.isSpillable() && CostPerUseLimit == static_cast<uint8_t>(~0u)); + // Number of available candidates - if 0, no need to continue. + size_t Available = 0; + // Make sure we don't have leftover partial state from an attempt where we had + // no available candidates and bailed out early. + resetInputs(*Runner); + + // Track the index->register mapping because AllocationOrder doesn't do that + // and we'd have to scan it. + // Also track their mask, to write asserts/debug. + CandidateRegList Regs; + Regs.fill({0, false}); + + // Track the largest value of features seen during this eviction session. We + // only normalize (some of) the float features, but it's just simpler to + // dimension 'Largest' to all the features, especially since we have the + // 'DoNotNormalize' list. + FeaturesListNormalizer Largest; + Largest.fill(0.0); + + // Same overal idea as in the default eviction policy - we visit the values of + // AllocationOrder one at a time. If it's not legally available, we mask off + // the corresponding feature column (==do nothing because we already reset all + // the features to 0) + // Use Pos to capture the column we load features at - in AllocationOrder + // order. + size_t Pos = 0; + for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E; + ++I, ++Pos) { + MCRegister PhysReg = *I; + Regs[Pos] = std::make_pair(PhysReg, true); + assert(PhysReg); + if (!canAllocatePhysReg(CostPerUseLimit, PhysReg)) { + Regs[Pos].second = false; + continue; + } + if (loadInterferenceFeatures(VirtReg, PhysReg, I.isHint(), FixedRegisters, + Largest, Pos)) { + ++Available; + Regs[Pos].second = true; + } + } + if (Available == 0) { + // Nothing to decide, nothing to learn. + assert(!MustFindEviction); + return MCRegister::NoRegister; + } + // If we must find eviction, the candidate should be masked out of the + // decision making process. + Regs[CandidateVirtRegPos].second = !MustFindEviction; + if (!MustFindEviction) + extractFeatures(SmallVector<LiveInterval *, 1>(1, &VirtReg), Largest, + CandidateVirtRegPos, /*IsHint*/ 0, /*LocalIntfsCount*/ 0, + /*NrUrgent*/ 0.0); + assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had " + "nothing to allocate initially."); + // Normalize the features. + for (auto &V : Largest) + V = V ? V : 1.0; + for (size_t FeatureIndex = 0; FeatureIndex < FeatureIDs::FeatureCount; + ++FeatureIndex) { + if (DoNotNormalize.test(FeatureIndex)) + continue; + for (size_t Pos = 0; Pos < NumberOfInterferences; ++Pos) { + Runner->getTensor<float>(FeatureIndex)[Pos] /= Largest[FeatureIndex]; + } + } + *Runner->getTensor<float>(FeatureIDs::progress) = + static_cast<float>(RA.getQueueSize()) / InitialQSize; + + // Get a decision. + size_t CandidatePos = tryFindEvictionCandidatePosition( + VirtReg, Order, OrderLimit, CostPerUseLimit, FixedRegisters); + // The contract with the ML side is that CandidatePos is mask == 1 (i.e. + // Regs[CandidatePos].second) + assert(Regs[CandidatePos].second); + if (CandidatePos == CandidateVirtRegPos) { + assert(!MustFindEviction); + return MCRegister::NoRegister; + } + return Regs[CandidatePos].first; +} + +// Overall, this currently mimics what we do for weight calculation, but instead +// of accummulating the various features, we keep them separate. +void MLEvictAdvisor::extractFeatures( + const SmallVectorImpl<LiveInterval *> &Intervals, + std::array<float, FeatureIDs::FeatureCount> &Largest, size_t Pos, + int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent) const { + int64_t NrDefsAndUses = 0; + int64_t NrBrokenHints = 0; + float R = 0; + float W = 0; + float RW = 0; + float IndVarUpdates = 0; + float HintWeights = 0.0; + float StartBBFreq = 0.0; + float EndBBFreq = 0.0; + float HottestBlockFreq = 0.0; + int32_t NrRematerializable = 0; + float TotalWeight = 0.0; + + SlotIndex EndSI = LIS->getSlotIndexes()->getZeroIndex(); + SlotIndex StartSI = LIS->getSlotIndexes()->getLastIndex(); + int64_t MaxStage = 0; + int64_t MinStage = + Intervals.empty() ? 0 : std::numeric_limits<int64_t>::max(); + + for (const auto *L : Intervals) { + const LiveInterval &LI = *L; + MaxStage = std::max<int64_t>( + MaxStage, static_cast<int64_t>(RA.getExtraInfo().getStage(LI))); + MinStage = std::min<int64_t>( + MinStage, static_cast<int64_t>(RA.getExtraInfo().getStage(LI))); + + TotalWeight = std::max(TotalWeight, LI.weight()); + + if (LI.beginIndex() < StartSI) + StartSI = LI.beginIndex(); + + if (LI.endIndex() > EndSI) + EndSI = LI.endIndex(); + + SmallPtrSet<MachineInstr *, 8> Visited; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + NrBrokenHints += VRM->hasPreferredPhys(LI.reg()); + + for (MachineRegisterInfo::reg_instr_nodbg_iterator + I = MRI->reg_instr_nodbg_begin(LI.reg()), + E = MRI->reg_instr_nodbg_end(); + I != E;) { + MachineInstr *MI = &*(I++); + + ++NrDefsAndUses; + if (!Visited.insert(MI).second) + continue; + + if (MI->isIdentityCopy() || MI->isImplicitDef()) + continue; + + bool Reads, Writes; + std::tie(Reads, Writes) = MI->readsWritesVirtualRegister(LI.reg()); + + float Freq = MBFI.getBlockFreqRelativeToEntryBlock(MI->getParent()); + if (Freq > HottestBlockFreq) + HottestBlockFreq = Freq; + R += (Reads && !Writes) * Freq; + W += (!Reads && Writes) * Freq; + RW += (Reads && Writes) * Freq; + + auto *MBB = MI->getParent(); + auto *Loop = Loops.getLoopFor(MBB); + bool IsExiting = Loop ? Loop->isLoopExiting(MBB) : false; + + if (Writes && IsExiting && LIS->isLiveOutOfMBB(LI, MBB)) + IndVarUpdates += Freq; + + if (MI->isCopy() && VirtRegAuxInfo::copyHint(MI, LI.reg(), TRI, *MRI)) + HintWeights += Freq; + } + NrRematerializable += VirtRegAuxInfo::isRematerializable( + LI, *LIS, *VRM, *MF.getSubtarget().getInstrInfo()); + } + size_t Size = 0; + if (!Intervals.empty()) { + StartBBFreq = + MBFI.getBlockFreqRelativeToEntryBlock(LIS->getMBBFromIndex(StartSI)); + if (EndSI >= LIS->getSlotIndexes()->getLastIndex()) + EndSI = LIS->getSlotIndexes()->getLastIndex().getPrevIndex(); + EndBBFreq = + MBFI.getBlockFreqRelativeToEntryBlock(LIS->getMBBFromIndex(EndSI)); + Size = StartSI.distance(EndSI); + } + // Set the features at the column 'Pos'. +#define SET(ID, TYPE, VAL) \ + do { \ + Runner->getTensor<TYPE>(FeatureIDs::ID)[Pos] = static_cast<TYPE>(VAL); \ + if (!DoNotNormalize.test(FeatureIDs::ID)) \ + Largest[FeatureIDs::ID] = \ + std::max(Largest[FeatureIDs::ID], static_cast<float>(VAL)); \ + } while (false) + SET(mask, int64_t, 1); + SET(is_free, int64_t, Intervals.empty()); + SET(nr_urgent, float, NrUrgent); + SET(nr_broken_hints, float, NrBrokenHints); + SET(is_hint, int64_t, IsHint); + SET(is_local, int64_t, LocalIntfsCount); + SET(nr_rematerializable, float, NrRematerializable); + SET(nr_defs_and_uses, float, NrDefsAndUses); + SET(weighed_reads_by_max, float, R); + SET(weighed_writes_by_max, float, W); + SET(weighed_read_writes_by_max, float, RW); + SET(weighed_indvars_by_max, float, IndVarUpdates); + SET(hint_weights_by_max, float, HintWeights); + SET(start_bb_freq_by_max, float, StartBBFreq); + SET(end_bb_freq_by_max, float, EndBBFreq); + SET(hottest_bb_freq_by_max, float, HottestBlockFreq); + SET(liverange_size, float, Size); + SET(use_def_density, float, TotalWeight); + SET(max_stage, int64_t, MaxStage); + SET(min_stage, int64_t, MinStage); +#undef SET +} + +// Development mode-specific implementations +#ifdef LLVM_HAVE_TF_API +RegAllocEvictionAdvisorAnalysis *llvm::createDevelopmentModeAdvisor() { + return new DevelopmentModeEvictionAdvisorAnalysis(); +} + +int64_t DevelopmentModeEvictAdvisor::tryFindEvictionCandidatePosition( + LiveInterval &VirtReg, const AllocationOrder &Order, unsigned OrderLimit, + uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const { + int64_t Ret = 0; + if (isa<ModelUnderTrainingRunner>(getRunner())) { + Ret = MLEvictAdvisor::tryFindEvictionCandidatePosition( + VirtReg, Order, OrderLimit, CostPerUseLimit, FixedRegisters); + } else { + MCRegister PhysReg = getDefaultAdvisor().tryFindEvictionCandidate( + VirtReg, Order, CostPerUseLimit, FixedRegisters); + // Find the index of the selected PhysReg. We need it for logging, otherwise + // this is wasted cycles (but so would starting development mode without a + // model nor logging) + if (!PhysReg) + Ret = CandidateVirtRegPos; + else + for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); + I != E; ++I, ++Ret) + if (*I == PhysReg) + break; + } + if (TrainingLog.empty()) + return Ret; + size_t CurrentFeature = 0; + for (; CurrentFeature < FeatureIDs::FeatureCount; ++CurrentFeature) { + Log->logSpecifiedTensorValue( + CurrentFeature, reinterpret_cast<const char *>( + getRunner().getTensorUntyped(CurrentFeature))); + } + if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(&getRunner())) + for (size_t I = 1; I < MUTR->outputLoggedFeatureSpecs().size(); + ++I, ++CurrentFeature) + Log->logSpecifiedTensorValue( + CurrentFeature, + reinterpret_cast<const char *>( + MUTR->lastEvaluationResult()->getUntypedTensorValue(I))); + // The output is right after the features and the extra outputs + Log->logInt64Value(CurrentFeature, &Ret); + return Ret; +} + +bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) { + if (auto *DevModeAnalysis = dyn_cast<DevelopmentModeEvictionAdvisorAnalysis>( + &getAnalysis<RegAllocEvictionAdvisorAnalysis>())) + if (auto *Log = DevModeAnalysis->getLogger(MF)) + Log->logFloatFinalReward(static_cast<float>( + calculateRegAllocScore( + MF, getAnalysis<MachineBlockFrequencyInfo>(), + getAnalysis<AAResultsWrapperPass>().getAAResults()) + .getScore())); + + return false; +} +#endif // #ifdef LLVM_HAVE_TF_API + +#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) +RegAllocEvictionAdvisorAnalysis *llvm::createReleaseModeAdvisor() { + return new ReleaseModeEvictionAdvisorAnalysis(); +} +#endif + +// In all cases except development mode, we don't need scoring. +#if !defined(LLVM_HAVE_TF_API) +bool RegAllocScoring::runOnMachineFunction(MachineFunction &) { return false; } +#endif diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 692587cd58fa..c93ffaabf74c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -96,6 +96,12 @@ static cl::opt<unsigned> AlignAllNonFallThruBlocks( "format (e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden); +static cl::opt<unsigned> MaxBytesForAlignmentOverride( + "max-bytes-for-alignment", + cl::desc("Forces the maximum bytes allowed to be emitted when padding for " + "alignment"), + cl::init(0), cl::Hidden); + // FIXME: Find a good default for this flag and remove the flag. static cl::opt<unsigned> ExitBlockBias( "block-placement-exit-block-bias", @@ -2929,10 +2935,21 @@ void MachineBlockPlacement::alignBlocks() { MachineBasicBlock *LayoutPred = &*std::prev(MachineFunction::iterator(ChainBB)); + auto DetermineMaxAlignmentPadding = [&]() { + // Set the maximum bytes allowed to be emitted for alignment. + unsigned MaxBytes; + if (MaxBytesForAlignmentOverride.getNumOccurrences() > 0) + MaxBytes = MaxBytesForAlignmentOverride; + else + MaxBytes = TLI->getMaxPermittedBytesForAlignment(ChainBB); + ChainBB->setMaxBytesForAlignment(MaxBytes); + }; + // Force alignment if all the predecessors are jumps. We already checked // that the block isn't cold above. if (!LayoutPred->isSuccessor(ChainBB)) { ChainBB->setAlignment(Align); + DetermineMaxAlignmentPadding(); continue; } @@ -2943,8 +2960,10 @@ void MachineBlockPlacement::alignBlocks() { BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, ChainBB); BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb; - if (LayoutEdgeFreq <= (Freq * ColdProb)) + if (LayoutEdgeFreq <= (Freq * ColdProb)) { ChainBB->setAlignment(Align); + DetermineMaxAlignmentPadding(); + } } } @@ -3418,17 +3437,30 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { ComputedEdges.clear(); ChainAllocator.DestroyAll(); + bool HasMaxBytesOverride = + MaxBytesForAlignmentOverride.getNumOccurrences() > 0; + if (AlignAllBlock) // Align all of the blocks in the function to a specific alignment. - for (MachineBasicBlock &MBB : MF) - MBB.setAlignment(Align(1ULL << AlignAllBlock)); + for (MachineBasicBlock &MBB : MF) { + if (HasMaxBytesOverride) + MBB.setAlignment(Align(1ULL << AlignAllBlock), + MaxBytesForAlignmentOverride); + else + MBB.setAlignment(Align(1ULL << AlignAllBlock)); + } else if (AlignAllNonFallThruBlocks) { // Align all of the blocks that have no fall-through predecessors to a // specific alignment. for (auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) { auto LayoutPred = std::prev(MBI); - if (!LayoutPred->isSuccessor(&*MBI)) - MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks)); + if (!LayoutPred->isSuccessor(&*MBI)) { + if (HasMaxBytesOverride) + MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks), + MaxBytesForAlignmentOverride); + else + MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks)); + } } } if (ViewBlockLayoutWithBFI != GVDT_None && diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp index 6ddb1758719b..a39dc79baaa8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp @@ -29,9 +29,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_END(MachineDominanceFrontier, "machine-domfrontier", "Machine Dominance Frontier Construction", true, true) -MachineDominanceFrontier::MachineDominanceFrontier() - : MachineFunctionPass(ID), - Base() { +MachineDominanceFrontier::MachineDominanceFrontier() : MachineFunctionPass(ID) { initializeMachineDominanceFrontierPass(*PassRegistry::getPassRegistry()); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp index 81ed3d0e93ff..fd5ea5cad072 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp @@ -76,6 +76,8 @@ #include <utility> #include <vector> +#include "LiveDebugValues/LiveDebugValues.h" + using namespace llvm; #define DEBUG_TYPE "codegen" @@ -1238,7 +1240,7 @@ bool MachineFunction::useDebugInstrRef() const { if (F.hasFnAttribute(Attribute::OptimizeNone)) return false; - if (getTarget().Options.ValueTrackingVariableLocations) + if (llvm::debuginfoShouldUseDebugInstrRef(getTarget().getTargetTriple())) return true; return false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp index 6ca97031b92a..759cff179790 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp @@ -144,6 +144,10 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, SmallSet<Register, 8> UndefUseSet; SmallVector<MachineOperand*, 4> Defs; for (auto MII = FirstMI; MII != LastMI; ++MII) { + // Debug instructions have no effects to track. + if (MII->isDebugInstr()) + continue; + for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { MachineOperand &MO = MII->getOperand(i); if (!MO.isReg()) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp index 59fc23983d3d..5347a7b0d890 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp @@ -22,8 +22,7 @@ using namespace llvm; DiagnosticInfoMIROptimization::MachineArgument::MachineArgument( - StringRef MKey, const MachineInstr &MI) - : Argument() { + StringRef MKey, const MachineInstr &MI) { Key = std::string(MKey); raw_string_ostream OS(Val); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp index 54c478645dcf..0dbbc218e946 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp @@ -796,9 +796,14 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, if (Reg == 0) continue; - // Don't handle physical register. - if (Register::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) { + if (MO.isUse() && + (MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO))) + continue; + + // Don't handle non-constant and non-ignorable physical register. return false; + } // Users for the defs are all dominated by SuccToSinkTo. if (MO.isDef()) { @@ -898,7 +903,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, // it could get allocated to something with a def during allocation. - if (!MRI->isConstantPhysReg(Reg)) + if (!MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO)) return nullptr; } else if (!MO.isDead()) { // A def that isn't dead. We can't move it. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp index aaa6403cc978..f91a9d2c3a32 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -1704,7 +1704,7 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() { // Peel out the prologs. LS.reset(); for (int I = 0; I < Schedule.getNumStages() - 1; ++I) { - LS[I] = 1; + LS[I] = true; Prologs.push_back(peelKernel(LPD_Front)); LiveStages[Prologs.back()] = LS; AvailableStages[Prologs.back()] = LS; @@ -1752,7 +1752,7 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() { // Move stage one block at a time so that Phi nodes are updated correctly. for (size_t K = Iteration; K > I; K--) moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage); - LS[Stage] = 1; + LS[Stage] = true; } LiveStages[Epilogs[I]] = LS; AvailableStages[Epilogs[I]] = AS; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp index 9ed3471c0fc9..db5217469fba 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/NonRelocatableStringpool.h" +#include "llvm/ADT/STLExtras.h" namespace llvm { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index e3eb3f825851..74b903f99284 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -97,7 +97,7 @@ static bool lowerObjCCall(Function &F, const char *NewFn, objcarc::ARCInstKind Kind = objcarc::getAttachedARCFunctionKind(CB); (void)Kind; assert((Kind == objcarc::ARCInstKind::RetainRV || - Kind == objcarc::ARCInstKind::ClaimRV) && + Kind == objcarc::ARCInstKind::UnsafeClaimRV) && "use expected to be the argument of operand bundle " "\"clang.arc.attachedcall\""); U.set(FCache.getCallee()); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp index 9f1012c95964..87df7bb4a689 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "RegAllocEvictionAdvisor.h" +#include "RegAllocGreedy.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/VirtRegMap.h" @@ -42,6 +43,9 @@ static cl::opt<bool> EnableLocalReassignment( cl::init(false)); #define DEBUG_TYPE "regalloc" +#ifdef LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL +#define LLVM_HAVE_TF_AOT +#endif char RegAllocEvictionAdvisorAnalysis::ID = 0; INITIALIZE_PASS(RegAllocEvictionAdvisorAnalysis, "regalloc-evict", @@ -62,12 +66,8 @@ public: private: std::unique_ptr<RegAllocEvictionAdvisor> - getAdvisor(const MachineFunction &MF, LiveRegMatrix *Matrix, - LiveIntervals *LIS, VirtRegMap *VRM, - const RegisterClassInfo &RegClassInfo, - ExtraRegInfo *ExtraInfo) override { - return std::make_unique<DefaultEvictionAdvisor>(MF, Matrix, LIS, VRM, - RegClassInfo, ExtraInfo); + getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { + return std::make_unique<DefaultEvictionAdvisor>(MF, RA); } bool doInitialization(Module &M) override { if (NotAsRequested) @@ -86,10 +86,14 @@ template <> Pass *llvm::callDefaultCtor<RegAllocEvictionAdvisorAnalysis>() { Ret = new DefaultEvictionAdvisorAnalysis(/*NotAsRequested*/ false); break; case RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development: - // TODO(mtrofin): add implementation +#if defined(LLVM_HAVE_TF_API) + Ret = createDevelopmentModeAdvisor(); +#endif break; case RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release: - // TODO(mtrofin): add implementation +#if defined(LLVM_HAVE_TF_AOT) + Ret = createReleaseModeAdvisor(); +#endif break; } if (Ret) @@ -109,13 +113,12 @@ StringRef RegAllocEvictionAdvisorAnalysis::getPassName() const { llvm_unreachable("Unknown advisor kind"); } -RegAllocEvictionAdvisor::RegAllocEvictionAdvisor( - const MachineFunction &MF, LiveRegMatrix *Matrix, LiveIntervals *LIS, - VirtRegMap *VRM, const RegisterClassInfo &RegClassInfo, - ExtraRegInfo *ExtraInfo) - : MF(MF), Matrix(Matrix), LIS(LIS), VRM(VRM), MRI(&VRM->getRegInfo()), - TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RegClassInfo), - RegCosts(TRI->getRegisterCosts(MF)), ExtraInfo(ExtraInfo), +RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(const MachineFunction &MF, + const RAGreedy &RA) + : MF(MF), RA(RA), Matrix(RA.getInterferenceMatrix()), + LIS(RA.getLiveIntervals()), VRM(RA.getVirtRegMap()), + MRI(&VRM->getRegInfo()), TRI(MF.getSubtarget().getRegisterInfo()), + RegClassInfo(RA.getRegClassInfo()), RegCosts(TRI->getRegisterCosts(MF)), EnableLocalReassign(EnableLocalReassignment || MF.getSubtarget().enableRALocalReassignment( MF.getTarget().getOptLevel())) {} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h index debb75ed5020..33e03aed81a7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h @@ -87,87 +87,9 @@ struct EvictionCost { } }; -/// Track allocation stage and eviction loop prevention during allocation. -// TODO(mtrofin): Consider exposing RAGreedy in a header instead, and folding -// this back into it. -class ExtraRegInfo final { - // RegInfo - Keep additional information about each live range. - struct RegInfo { - LiveRangeStage Stage = RS_New; - - // Cascade - Eviction loop prevention. See - // canEvictInterferenceBasedOnCost(). - unsigned Cascade = 0; - - RegInfo() = default; - }; - - IndexedMap<RegInfo, VirtReg2IndexFunctor> Info; - unsigned NextCascade = 1; - -public: - ExtraRegInfo() = default; - ExtraRegInfo(const ExtraRegInfo &) = delete; - - LiveRangeStage getStage(Register Reg) const { return Info[Reg].Stage; } - - LiveRangeStage getStage(const LiveInterval &VirtReg) const { - return getStage(VirtReg.reg()); - } - - void setStage(Register Reg, LiveRangeStage Stage) { - Info.grow(Reg.id()); - Info[Reg].Stage = Stage; - } - - void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) { - setStage(VirtReg.reg(), Stage); - } - - /// Return the current stage of the register, if present, otherwise initialize - /// it and return that. - LiveRangeStage getOrInitStage(Register Reg) { - Info.grow(Reg.id()); - return getStage(Reg); - } - - unsigned getCascade(Register Reg) const { return Info[Reg].Cascade; } - - void setCascade(Register Reg, unsigned Cascade) { - Info.grow(Reg.id()); - Info[Reg].Cascade = Cascade; - } - - unsigned getOrAssignNewCascade(Register Reg) { - unsigned Cascade = getCascade(Reg); - if (!Cascade) { - Cascade = NextCascade++; - setCascade(Reg, Cascade); - } - return Cascade; - } - - unsigned getCascadeOrCurrentNext(Register Reg) const { - unsigned Cascade = getCascade(Reg); - if (!Cascade) - Cascade = NextCascade; - return Cascade; - } - - template <typename Iterator> - void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { - for (; Begin != End; ++Begin) { - Register Reg = *Begin; - Info.grow(Reg.id()); - if (Info[Reg].Stage == RS_New) - Info[Reg].Stage = NewStage; - } - } - void LRE_DidCloneVirtReg(Register New, Register Old); -}; - /// Interface to the eviction advisor, which is responsible for making a /// decision as to which live ranges should be evicted (if any). +class RAGreedy; class RegAllocEvictionAdvisor { public: RegAllocEvictionAdvisor(const RegAllocEvictionAdvisor &) = delete; @@ -193,14 +115,23 @@ public: bool isUnusedCalleeSavedReg(MCRegister PhysReg) const; protected: - RegAllocEvictionAdvisor(const MachineFunction &MF, LiveRegMatrix *Matrix, - LiveIntervals *LIS, VirtRegMap *VRM, - const RegisterClassInfo &RegClassInfo, - ExtraRegInfo *ExtraInfo); + RegAllocEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA); Register canReassign(LiveInterval &VirtReg, Register PrevReg) const; + // Get the upper limit of elements in the given Order we need to analize. + // TODO: is this heuristic, we could consider learning it. + Optional<unsigned> getOrderLimit(const LiveInterval &VirtReg, + const AllocationOrder &Order, + unsigned CostPerUseLimit) const; + + // Determine if it's worth trying to allocate this reg, given the + // CostPerUseLimit + // TODO: this is a heuristic component we could consider learning, too. + bool canAllocatePhysReg(unsigned CostPerUseLimit, MCRegister PhysReg) const; + const MachineFunction &MF; + const RAGreedy &RA; LiveRegMatrix *const Matrix; LiveIntervals *const LIS; VirtRegMap *const VRM; @@ -208,7 +139,6 @@ protected: const TargetRegisterInfo *const TRI; const RegisterClassInfo &RegClassInfo; const ArrayRef<uint8_t> RegCosts; - ExtraRegInfo *const ExtraInfo; /// Run or not the local reassignment heuristic. This information is /// obtained from the TargetSubtargetInfo. @@ -243,19 +173,17 @@ public: /// Get an advisor for the given context (i.e. machine function, etc) virtual std::unique_ptr<RegAllocEvictionAdvisor> - getAdvisor(const MachineFunction &MF, LiveRegMatrix *Matrix, - LiveIntervals *LIS, VirtRegMap *VRM, - const RegisterClassInfo &RegClassInfo, - ExtraRegInfo *ExtraInfo) = 0; + getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0; AdvisorMode getAdvisorMode() const { return Mode; } -private: +protected: // This analysis preserves everything, and subclasses may have additional // requirements. void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } +private: StringRef getPassName() const override; const AdvisorMode Mode; }; @@ -264,25 +192,16 @@ private: /// an instance of the eviction advisor. template <> Pass *callDefaultCtor<RegAllocEvictionAdvisorAnalysis>(); -// TODO(mtrofin): implement these. -#ifdef LLVM_HAVE_TF_AOT RegAllocEvictionAdvisorAnalysis *createReleaseModeAdvisor(); -#endif -#ifdef LLVM_HAVE_TF_API RegAllocEvictionAdvisorAnalysis *createDevelopmentModeAdvisor(); -#endif // TODO: move to RegAllocEvictionAdvisor.cpp when we move implementation // out of RegAllocGreedy.cpp class DefaultEvictionAdvisor : public RegAllocEvictionAdvisor { public: - DefaultEvictionAdvisor(const MachineFunction &MF, LiveRegMatrix *Matrix, - LiveIntervals *LIS, VirtRegMap *VRM, - const RegisterClassInfo &RegClassInfo, - ExtraRegInfo *ExtraInfo) - : RegAllocEvictionAdvisor(MF, Matrix, LIS, VRM, RegClassInfo, ExtraInfo) { - } + DefaultEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA) + : RegAllocEvictionAdvisor(MF, RA) {} private: MCRegister tryFindEvictionCandidate(LiveInterval &, const AllocationOrder &, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp index ce3cf31dbd6b..6ea6dbcbbb74 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "RegAllocGreedy.h" #include "AllocationOrder.h" #include "InterferenceCache.h" #include "LiveDebugVariables.h" @@ -135,362 +136,6 @@ static cl::opt<bool> ConsiderLocalIntervalCost( static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); -namespace { - -class RAGreedy : public MachineFunctionPass, - public RegAllocBase, - private LiveRangeEdit::Delegate { - // Convenient shortcuts. - using PQueue = std::priority_queue<std::pair<unsigned, unsigned>>; - using SmallLISet = SmallPtrSet<LiveInterval *, 4>; - - // context - MachineFunction *MF; - - // Shortcuts to some useful interface. - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - RegisterClassInfo RCI; - - // analyses - SlotIndexes *Indexes; - MachineBlockFrequencyInfo *MBFI; - MachineDominatorTree *DomTree; - MachineLoopInfo *Loops; - MachineOptimizationRemarkEmitter *ORE; - EdgeBundles *Bundles; - SpillPlacement *SpillPlacer; - LiveDebugVariables *DebugVars; - AliasAnalysis *AA; - - // state - std::unique_ptr<Spiller> SpillerInstance; - PQueue Queue; - std::unique_ptr<VirtRegAuxInfo> VRAI; - Optional<ExtraRegInfo> ExtraInfo; - std::unique_ptr<RegAllocEvictionAdvisor> EvictAdvisor; - - // Enum CutOffStage to keep a track whether the register allocation failed - // because of the cutoffs encountered in last chance recoloring. - // Note: This is used as bitmask. New value should be next power of 2. - enum CutOffStage { - // No cutoffs encountered - CO_None = 0, - - // lcr-max-depth cutoff encountered - CO_Depth = 1, - - // lcr-max-interf cutoff encountered - CO_Interf = 2 - }; - - uint8_t CutOffInfo; - -#ifndef NDEBUG - static const char *const StageName[]; -#endif - - /// EvictionTrack - Keeps track of past evictions in order to optimize region - /// split decision. - class EvictionTrack { - - public: - using EvictorInfo = - std::pair<Register /* evictor */, MCRegister /* physreg */>; - using EvicteeInfo = llvm::DenseMap<Register /* evictee */, EvictorInfo>; - - private: - /// Each Vreg that has been evicted in the last stage of selectOrSplit will - /// be mapped to the evictor Vreg and the PhysReg it was evicted from. - EvicteeInfo Evictees; - - public: - /// Clear all eviction information. - void clear() { Evictees.clear(); } - - /// Clear eviction information for the given evictee Vreg. - /// E.g. when Vreg get's a new allocation, the old eviction info is no - /// longer relevant. - /// \param Evictee The evictee Vreg for whom we want to clear collected - /// eviction info. - void clearEvicteeInfo(Register Evictee) { Evictees.erase(Evictee); } - - /// Track new eviction. - /// The Evictor vreg has evicted the Evictee vreg from Physreg. - /// \param PhysReg The physical register Evictee was evicted from. - /// \param Evictor The evictor Vreg that evicted Evictee. - /// \param Evictee The evictee Vreg. - void addEviction(MCRegister PhysReg, Register Evictor, Register Evictee) { - Evictees[Evictee].first = Evictor; - Evictees[Evictee].second = PhysReg; - } - - /// Return the Evictor Vreg which evicted Evictee Vreg from PhysReg. - /// \param Evictee The evictee vreg. - /// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if - /// nobody has evicted Evictee from PhysReg. - EvictorInfo getEvictor(Register Evictee) { - if (Evictees.count(Evictee)) { - return Evictees[Evictee]; - } - - return EvictorInfo(0, 0); - } - }; - - // Keeps track of past evictions in order to optimize region split decision. - EvictionTrack LastEvicted; - - // splitting state. - std::unique_ptr<SplitAnalysis> SA; - std::unique_ptr<SplitEditor> SE; - - /// Cached per-block interference maps - InterferenceCache IntfCache; - - /// All basic blocks where the current register has uses. - SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints; - - /// Global live range splitting candidate info. - struct GlobalSplitCandidate { - // Register intended for assignment, or 0. - MCRegister PhysReg; - - // SplitKit interval index for this candidate. - unsigned IntvIdx; - - // Interference for PhysReg. - InterferenceCache::Cursor Intf; - - // Bundles where this candidate should be live. - BitVector LiveBundles; - SmallVector<unsigned, 8> ActiveBlocks; - - void reset(InterferenceCache &Cache, MCRegister Reg) { - PhysReg = Reg; - IntvIdx = 0; - Intf.setPhysReg(Cache, Reg); - LiveBundles.clear(); - ActiveBlocks.clear(); - } - - // Set B[I] = C for every live bundle where B[I] was NoCand. - unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) { - unsigned Count = 0; - for (unsigned I : LiveBundles.set_bits()) - if (B[I] == NoCand) { - B[I] = C; - Count++; - } - return Count; - } - }; - - /// Candidate info for each PhysReg in AllocationOrder. - /// This vector never shrinks, but grows to the size of the largest register - /// class. - SmallVector<GlobalSplitCandidate, 32> GlobalCand; - - enum : unsigned { NoCand = ~0u }; - - /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to - /// NoCand which indicates the stack interval. - SmallVector<unsigned, 32> BundleCand; - - /// Callee-save register cost, calculated once per machine function. - BlockFrequency CSRCost; - - /// Enable or not the consideration of the cost of local intervals created - /// by a split candidate when choosing the best split candidate. - bool EnableAdvancedRASplitCost; - - /// Set of broken hints that may be reconciled later because of eviction. - SmallSetVector<LiveInterval *, 8> SetOfBrokenHints; - - /// The register cost values. This list will be recreated for each Machine - /// Function - ArrayRef<uint8_t> RegCosts; - -public: - RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses); - - /// Return the pass name. - StringRef getPassName() const override { return "Greedy Register Allocator"; } - - /// RAGreedy analysis usage. - void getAnalysisUsage(AnalysisUsage &AU) const override; - void releaseMemory() override; - Spiller &spiller() override { return *SpillerInstance; } - void enqueueImpl(LiveInterval *LI) override; - LiveInterval *dequeue() override; - MCRegister selectOrSplit(LiveInterval &, - SmallVectorImpl<Register> &) override; - void aboutToRemoveInterval(LiveInterval &) override; - - /// Perform register allocation. - bool runOnMachineFunction(MachineFunction &mf) override; - - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoPHIs); - } - - MachineFunctionProperties getClearedProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::IsSSA); - } - - static char ID; - -private: - MCRegister selectOrSplitImpl(LiveInterval &, SmallVectorImpl<Register> &, - SmallVirtRegSet &, unsigned = 0); - - bool LRE_CanEraseVirtReg(Register) override; - void LRE_WillShrinkVirtReg(Register) override; - void LRE_DidCloneVirtReg(Register, Register) override; - void enqueue(PQueue &CurQueue, LiveInterval *LI); - LiveInterval *dequeue(PQueue &CurQueue); - - BlockFrequency calcSpillCost(); - bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); - bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); - bool growRegion(GlobalSplitCandidate &Cand); - bool splitCanCauseEvictionChain(Register Evictee, GlobalSplitCandidate &Cand, - unsigned BBNumber, - const AllocationOrder &Order); - bool splitCanCauseLocalSpill(unsigned VirtRegToSplit, - GlobalSplitCandidate &Cand, unsigned BBNumber, - const AllocationOrder &Order); - BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &, - const AllocationOrder &Order, - bool *CanCauseEvictionChain); - bool calcCompactRegion(GlobalSplitCandidate&); - void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>); - void calcGapWeights(MCRegister, SmallVectorImpl<float> &); - bool canEvictInterferenceInRange(const LiveInterval &VirtReg, - MCRegister PhysReg, SlotIndex Start, - SlotIndex End, EvictionCost &MaxCost) const; - MCRegister getCheapestEvicteeWeight(const AllocationOrder &Order, - const LiveInterval &VirtReg, - SlotIndex Start, SlotIndex End, - float *BestEvictWeight) const; - void evictInterference(LiveInterval &, MCRegister, - SmallVectorImpl<Register> &); - bool mayRecolorAllInterferences(MCRegister PhysReg, LiveInterval &VirtReg, - SmallLISet &RecoloringCandidates, - const SmallVirtRegSet &FixedRegisters); - - MCRegister tryAssign(LiveInterval&, AllocationOrder&, - SmallVectorImpl<Register>&, - const SmallVirtRegSet&); - MCRegister tryFindEvictionCandidate(LiveInterval &, const AllocationOrder &, - uint8_t, const SmallVirtRegSet &) const; - MCRegister tryEvict(LiveInterval &, AllocationOrder &, - SmallVectorImpl<Register> &, uint8_t, - const SmallVirtRegSet &); - MCRegister tryRegionSplit(LiveInterval &, AllocationOrder &, - SmallVectorImpl<Register> &); - /// Calculate cost of region splitting. - unsigned calculateRegionSplitCost(LiveInterval &VirtReg, - AllocationOrder &Order, - BlockFrequency &BestCost, - unsigned &NumCands, bool IgnoreCSR, - bool *CanCauseEvictionChain = nullptr); - /// Perform region splitting. - unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, - bool HasCompact, - SmallVectorImpl<Register> &NewVRegs); - /// Check other options before using a callee-saved register for the first - /// time. - MCRegister tryAssignCSRFirstTime(LiveInterval &VirtReg, - AllocationOrder &Order, MCRegister PhysReg, - uint8_t &CostPerUseLimit, - SmallVectorImpl<Register> &NewVRegs); - void initializeCSRCost(); - unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<Register>&); - unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<Register>&); - unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<Register>&); - unsigned trySplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<Register>&, - const SmallVirtRegSet&); - unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &, - SmallVectorImpl<Register> &, - SmallVirtRegSet &, unsigned); - bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<Register> &, - SmallVirtRegSet &, unsigned); - void tryHintRecoloring(LiveInterval &); - void tryHintsRecoloring(); - - /// Model the information carried by one end of a copy. - struct HintInfo { - /// The frequency of the copy. - BlockFrequency Freq; - /// The virtual register or physical register. - Register Reg; - /// Its currently assigned register. - /// In case of a physical register Reg == PhysReg. - MCRegister PhysReg; - - HintInfo(BlockFrequency Freq, Register Reg, MCRegister PhysReg) - : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {} - }; - using HintsInfo = SmallVector<HintInfo, 4>; - - BlockFrequency getBrokenHintFreq(const HintsInfo &, MCRegister); - void collectHintInfo(Register, HintsInfo &); - - /// Greedy RA statistic to remark. - struct RAGreedyStats { - unsigned Reloads = 0; - unsigned FoldedReloads = 0; - unsigned ZeroCostFoldedReloads = 0; - unsigned Spills = 0; - unsigned FoldedSpills = 0; - unsigned Copies = 0; - float ReloadsCost = 0.0f; - float FoldedReloadsCost = 0.0f; - float SpillsCost = 0.0f; - float FoldedSpillsCost = 0.0f; - float CopiesCost = 0.0f; - - bool isEmpty() { - return !(Reloads || FoldedReloads || Spills || FoldedSpills || - ZeroCostFoldedReloads || Copies); - } - - void add(RAGreedyStats other) { - Reloads += other.Reloads; - FoldedReloads += other.FoldedReloads; - ZeroCostFoldedReloads += other.ZeroCostFoldedReloads; - Spills += other.Spills; - FoldedSpills += other.FoldedSpills; - Copies += other.Copies; - ReloadsCost += other.ReloadsCost; - FoldedReloadsCost += other.FoldedReloadsCost; - SpillsCost += other.SpillsCost; - FoldedSpillsCost += other.FoldedSpillsCost; - CopiesCost += other.CopiesCost; - } - - void report(MachineOptimizationRemarkMissed &R); - }; - - /// Compute statistic for a basic block. - RAGreedyStats computeStats(MachineBasicBlock &MBB); - - /// Compute and report statistic through a remark. - RAGreedyStats reportStats(MachineLoop *L); - - /// Report the statistic for each loop. - void reportStats(); -}; - -} // end anonymous namespace - char RAGreedy::ID = 0; char &llvm::RAGreedyID = RAGreedy::ID; @@ -613,7 +258,7 @@ void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) { ExtraInfo->LRE_DidCloneVirtReg(New, Old); } -void ExtraRegInfo::LRE_DidCloneVirtReg(Register New, Register Old) { +void RAGreedy::ExtraRegInfo::LRE_DidCloneVirtReg(Register New, Register Old) { // Cloning a register we haven't even heard about yet? Just ignore it. if (!Info.inBounds(Old)) return; @@ -811,7 +456,7 @@ Register RegAllocEvictionAdvisor::canReassign(LiveInterval &VirtReg, bool DefaultEvictionAdvisor::shouldEvict(LiveInterval &A, bool IsHint, LiveInterval &B, bool BreaksHint) const { - bool CanSplit = ExtraInfo->getStage(B) < RS_Spill; + bool CanSplit = RA.getExtraInfo().getStage(B) < RS_Spill; // Be fairly aggressive about following hints as long as the evictee can be // split. @@ -852,7 +497,7 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost( if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) return false; - bool IsLocal = LIS->intervalIsInOneMBB(VirtReg); + bool IsLocal = VirtReg.empty() || LIS->intervalIsInOneMBB(VirtReg); // Find VirtReg's cascade number. This will be unassigned if VirtReg was never // involved in an eviction before. If a cascade number was assigned, deny @@ -861,7 +506,7 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost( // // This works out so a register without a cascade number is allowed to evict // anything, and it can be evicted by anything. - unsigned Cascade = ExtraInfo->getCascadeOrCurrentNext(VirtReg.reg()); + unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg()); EvictionCost Cost; for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { @@ -883,7 +528,7 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost( return false; // Never evict spill products. They cannot split or spill. - if (ExtraInfo->getStage(*Intf) == RS_Done) + if (RA.getExtraInfo().getStage(*Intf) == RS_Done) return false; // Once a live range becomes small enough, it is urgent that we find a // register for it. This is indicated by an infinite spill weight. These @@ -898,7 +543,7 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost( RegClassInfo.getNumAllocatableRegs( MRI->getRegClass(Intf->reg()))); // Only evict older cascades or live ranges without a cascade. - unsigned IntfCascade = ExtraInfo->getCascade(Intf->reg()); + unsigned IntfCascade = RA.getExtraInfo().getCascade(Intf->reg()); if (Cascade <= IntfCascade) { if (!Urgent) return false; @@ -1069,28 +714,20 @@ bool RegAllocEvictionAdvisor::isUnusedCalleeSavedReg(MCRegister PhysReg) const { return !Matrix->isPhysRegUsed(PhysReg); } -MCRegister DefaultEvictionAdvisor::tryFindEvictionCandidate( - LiveInterval &VirtReg, const AllocationOrder &Order, - uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const { - // Keep track of the cheapest interference seen so far. - EvictionCost BestCost; - BestCost.setMax(); - MCRegister BestPhys; +Optional<unsigned> +RegAllocEvictionAdvisor::getOrderLimit(const LiveInterval &VirtReg, + const AllocationOrder &Order, + unsigned CostPerUseLimit) const { unsigned OrderLimit = Order.getOrder().size(); - // When we are just looking for a reduced cost per use, don't break any - // hints, and only evict smaller spill weights. if (CostPerUseLimit < uint8_t(~0u)) { - BestCost.BrokenHints = 0; - BestCost.MaxWeight = VirtReg.weight(); - // Check of any registers in RC are below CostPerUseLimit. const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg()); uint8_t MinCost = RegClassInfo.getMinCost(RC); if (MinCost >= CostPerUseLimit) { LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost << ", no cheaper registers to be found.\n"); - return 0; + return None; } // It is normal for register classes to have a long tail of registers with @@ -1101,24 +738,50 @@ MCRegister DefaultEvictionAdvisor::tryFindEvictionCandidate( << " regs.\n"); } } + return OrderLimit; +} + +bool RegAllocEvictionAdvisor::canAllocatePhysReg(unsigned CostPerUseLimit, + MCRegister PhysReg) const { + if (RegCosts[PhysReg] >= CostPerUseLimit) + return false; + // The first use of a callee-saved register in a function has cost 1. + // Don't start using a CSR when the CostPerUseLimit is low. + if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) { + LLVM_DEBUG( + dbgs() << printReg(PhysReg, TRI) << " would clobber CSR " + << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI) + << '\n'); + return false; + } + return true; +} + +MCRegister DefaultEvictionAdvisor::tryFindEvictionCandidate( + LiveInterval &VirtReg, const AllocationOrder &Order, + uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const { + // Keep track of the cheapest interference seen so far. + EvictionCost BestCost; + BestCost.setMax(); + MCRegister BestPhys; + auto MaybeOrderLimit = getOrderLimit(VirtReg, Order, CostPerUseLimit); + if (!MaybeOrderLimit) + return MCRegister::NoRegister; + unsigned OrderLimit = *MaybeOrderLimit; + + // When we are just looking for a reduced cost per use, don't break any + // hints, and only evict smaller spill weights. + if (CostPerUseLimit < uint8_t(~0u)) { + BestCost.BrokenHints = 0; + BestCost.MaxWeight = VirtReg.weight(); + } for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E; ++I) { MCRegister PhysReg = *I; assert(PhysReg); - if (RegCosts[PhysReg] >= CostPerUseLimit) - continue; - // The first use of a callee-saved register in a function has cost 1. - // Don't start using a CSR when the CostPerUseLimit is low. - if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) { - LLVM_DEBUG( - dbgs() << printReg(PhysReg, TRI) << " would clobber CSR " - << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI) - << '\n'); - continue; - } - - if (!canEvictInterferenceBasedOnCost(VirtReg, PhysReg, false, BestCost, + if (!canAllocatePhysReg(CostPerUseLimit, PhysReg) || + !canEvictInterferenceBasedOnCost(VirtReg, PhysReg, false, BestCost, FixedRegisters)) continue; @@ -3269,8 +2932,8 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI)); ExtraInfo.emplace(); - EvictAdvisor = getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor( - *MF, Matrix, LIS, VRM, RegClassInfo, &*ExtraInfo); + EvictAdvisor = + getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this); IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. SetOfBrokenHints.clear(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h new file mode 100644 index 000000000000..e9a5fe635f26 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h @@ -0,0 +1,507 @@ +//==- RegAllocGreedy.h ------- greedy register allocator ----------*-C++-*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file defines the RAGreedy function pass for register allocation in +// optimized builds. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_REGALLOCGREEDY_H_ +#define LLVM_CODEGEN_REGALLOCGREEDY_H_ + +#include "AllocationOrder.h" +#include "InterferenceCache.h" +#include "LiveDebugVariables.h" +#include "RegAllocBase.h" +#include "RegAllocEvictionAdvisor.h" +#include "SpillPlacement.h" +#include "SplitKit.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/EdgeBundles.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervalUnion.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/LiveRegMatrix.h" +#include "llvm/CodeGen/LiveStacks.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/Spiller.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Target/TargetMachine.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <memory> +#include <queue> +#include <tuple> +#include <utility> + +namespace llvm { +class LLVM_LIBRARY_VISIBILITY RAGreedy : public MachineFunctionPass, + public RegAllocBase, + private LiveRangeEdit::Delegate { + // Interface to eviction advisers +public: + /// Track allocation stage and eviction loop prevention during allocation. + class ExtraRegInfo final { + // RegInfo - Keep additional information about each live range. + struct RegInfo { + LiveRangeStage Stage = RS_New; + + // Cascade - Eviction loop prevention. See + // canEvictInterferenceBasedOnCost(). + unsigned Cascade = 0; + + RegInfo() = default; + }; + + IndexedMap<RegInfo, VirtReg2IndexFunctor> Info; + unsigned NextCascade = 1; + + public: + ExtraRegInfo() = default; + ExtraRegInfo(const ExtraRegInfo &) = delete; + + LiveRangeStage getStage(Register Reg) const { return Info[Reg].Stage; } + + LiveRangeStage getStage(const LiveInterval &VirtReg) const { + return getStage(VirtReg.reg()); + } + + void setStage(Register Reg, LiveRangeStage Stage) { + Info.grow(Reg.id()); + Info[Reg].Stage = Stage; + } + + void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) { + setStage(VirtReg.reg(), Stage); + } + + /// Return the current stage of the register, if present, otherwise + /// initialize it and return that. + LiveRangeStage getOrInitStage(Register Reg) { + Info.grow(Reg.id()); + return getStage(Reg); + } + + unsigned getCascade(Register Reg) const { return Info[Reg].Cascade; } + + void setCascade(Register Reg, unsigned Cascade) { + Info.grow(Reg.id()); + Info[Reg].Cascade = Cascade; + } + + unsigned getOrAssignNewCascade(Register Reg) { + unsigned Cascade = getCascade(Reg); + if (!Cascade) { + Cascade = NextCascade++; + setCascade(Reg, Cascade); + } + return Cascade; + } + + unsigned getCascadeOrCurrentNext(Register Reg) const { + unsigned Cascade = getCascade(Reg); + if (!Cascade) + Cascade = NextCascade; + return Cascade; + } + + template <typename Iterator> + void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { + for (; Begin != End; ++Begin) { + Register Reg = *Begin; + Info.grow(Reg.id()); + if (Info[Reg].Stage == RS_New) + Info[Reg].Stage = NewStage; + } + } + void LRE_DidCloneVirtReg(Register New, Register Old); + }; + + LiveRegMatrix *getInterferenceMatrix() const { return Matrix; } + LiveIntervals *getLiveIntervals() const { return LIS; } + VirtRegMap *getVirtRegMap() const { return VRM; } + const RegisterClassInfo &getRegClassInfo() const { return RegClassInfo; } + const ExtraRegInfo &getExtraInfo() const { return *ExtraInfo; } + size_t getQueueSize() const { return Queue.size(); } + // end (interface to eviction advisers) + +private: + // Convenient shortcuts. + using PQueue = std::priority_queue<std::pair<unsigned, unsigned>>; + using SmallLISet = SmallPtrSet<LiveInterval *, 4>; + + // context + MachineFunction *MF; + + // Shortcuts to some useful interface. + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + RegisterClassInfo RCI; + + // analyses + SlotIndexes *Indexes; + MachineBlockFrequencyInfo *MBFI; + MachineDominatorTree *DomTree; + MachineLoopInfo *Loops; + MachineOptimizationRemarkEmitter *ORE; + EdgeBundles *Bundles; + SpillPlacement *SpillPlacer; + LiveDebugVariables *DebugVars; + AliasAnalysis *AA; + + // state + std::unique_ptr<Spiller> SpillerInstance; + PQueue Queue; + std::unique_ptr<VirtRegAuxInfo> VRAI; + Optional<ExtraRegInfo> ExtraInfo; + std::unique_ptr<RegAllocEvictionAdvisor> EvictAdvisor; + + // Enum CutOffStage to keep a track whether the register allocation failed + // because of the cutoffs encountered in last chance recoloring. + // Note: This is used as bitmask. New value should be next power of 2. + enum CutOffStage { + // No cutoffs encountered + CO_None = 0, + + // lcr-max-depth cutoff encountered + CO_Depth = 1, + + // lcr-max-interf cutoff encountered + CO_Interf = 2 + }; + + uint8_t CutOffInfo; + +#ifndef NDEBUG + static const char *const StageName[]; +#endif + + /// EvictionTrack - Keeps track of past evictions in order to optimize region + /// split decision. + class EvictionTrack { + + public: + using EvictorInfo = + std::pair<Register /* evictor */, MCRegister /* physreg */>; + using EvicteeInfo = llvm::DenseMap<Register /* evictee */, EvictorInfo>; + + private: + /// Each Vreg that has been evicted in the last stage of selectOrSplit will + /// be mapped to the evictor Vreg and the PhysReg it was evicted from. + EvicteeInfo Evictees; + + public: + /// Clear all eviction information. + void clear() { Evictees.clear(); } + + /// Clear eviction information for the given evictee Vreg. + /// E.g. when Vreg get's a new allocation, the old eviction info is no + /// longer relevant. + /// \param Evictee The evictee Vreg for whom we want to clear collected + /// eviction info. + void clearEvicteeInfo(Register Evictee) { Evictees.erase(Evictee); } + + /// Track new eviction. + /// The Evictor vreg has evicted the Evictee vreg from Physreg. + /// \param PhysReg The physical register Evictee was evicted from. + /// \param Evictor The evictor Vreg that evicted Evictee. + /// \param Evictee The evictee Vreg. + void addEviction(MCRegister PhysReg, Register Evictor, Register Evictee) { + Evictees[Evictee].first = Evictor; + Evictees[Evictee].second = PhysReg; + } + + /// Return the Evictor Vreg which evicted Evictee Vreg from PhysReg. + /// \param Evictee The evictee vreg. + /// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if + /// nobody has evicted Evictee from PhysReg. + EvictorInfo getEvictor(Register Evictee) { + if (Evictees.count(Evictee)) { + return Evictees[Evictee]; + } + + return EvictorInfo(0, 0); + } + }; + + // Keeps track of past evictions in order to optimize region split decision. + EvictionTrack LastEvicted; + + // splitting state. + std::unique_ptr<SplitAnalysis> SA; + std::unique_ptr<SplitEditor> SE; + + /// Cached per-block interference maps + InterferenceCache IntfCache; + + /// All basic blocks where the current register has uses. + SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints; + + /// Global live range splitting candidate info. + struct GlobalSplitCandidate { + // Register intended for assignment, or 0. + MCRegister PhysReg; + + // SplitKit interval index for this candidate. + unsigned IntvIdx; + + // Interference for PhysReg. + InterferenceCache::Cursor Intf; + + // Bundles where this candidate should be live. + BitVector LiveBundles; + SmallVector<unsigned, 8> ActiveBlocks; + + void reset(InterferenceCache &Cache, MCRegister Reg) { + PhysReg = Reg; + IntvIdx = 0; + Intf.setPhysReg(Cache, Reg); + LiveBundles.clear(); + ActiveBlocks.clear(); + } + + // Set B[I] = C for every live bundle where B[I] was NoCand. + unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) { + unsigned Count = 0; + for (unsigned I : LiveBundles.set_bits()) + if (B[I] == NoCand) { + B[I] = C; + Count++; + } + return Count; + } + }; + + /// Candidate info for each PhysReg in AllocationOrder. + /// This vector never shrinks, but grows to the size of the largest register + /// class. + SmallVector<GlobalSplitCandidate, 32> GlobalCand; + + enum : unsigned { NoCand = ~0u }; + + /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to + /// NoCand which indicates the stack interval. + SmallVector<unsigned, 32> BundleCand; + + /// Callee-save register cost, calculated once per machine function. + BlockFrequency CSRCost; + + /// Enable or not the consideration of the cost of local intervals created + /// by a split candidate when choosing the best split candidate. + bool EnableAdvancedRASplitCost; + + /// Set of broken hints that may be reconciled later because of eviction. + SmallSetVector<LiveInterval *, 8> SetOfBrokenHints; + + /// The register cost values. This list will be recreated for each Machine + /// Function + ArrayRef<uint8_t> RegCosts; + +public: + RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses); + + /// Return the pass name. + StringRef getPassName() const override { return "Greedy Register Allocator"; } + + /// RAGreedy analysis usage. + void getAnalysisUsage(AnalysisUsage &AU) const override; + void releaseMemory() override; + Spiller &spiller() override { return *SpillerInstance; } + void enqueueImpl(LiveInterval *LI) override; + LiveInterval *dequeue() override; + MCRegister selectOrSplit(LiveInterval &, + SmallVectorImpl<Register> &) override; + void aboutToRemoveInterval(LiveInterval &) override; + + /// Perform register allocation. + bool runOnMachineFunction(MachineFunction &mf) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoPHIs); + } + + MachineFunctionProperties getClearedProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } + + static char ID; + +private: + MCRegister selectOrSplitImpl(LiveInterval &, SmallVectorImpl<Register> &, + SmallVirtRegSet &, unsigned = 0); + + bool LRE_CanEraseVirtReg(Register) override; + void LRE_WillShrinkVirtReg(Register) override; + void LRE_DidCloneVirtReg(Register, Register) override; + void enqueue(PQueue &CurQueue, LiveInterval *LI); + LiveInterval *dequeue(PQueue &CurQueue); + + BlockFrequency calcSpillCost(); + bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency &); + bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); + bool growRegion(GlobalSplitCandidate &Cand); + bool splitCanCauseEvictionChain(Register Evictee, GlobalSplitCandidate &Cand, + unsigned BBNumber, + const AllocationOrder &Order); + bool splitCanCauseLocalSpill(unsigned VirtRegToSplit, + GlobalSplitCandidate &Cand, unsigned BBNumber, + const AllocationOrder &Order); + BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &, + const AllocationOrder &Order, + bool *CanCauseEvictionChain); + bool calcCompactRegion(GlobalSplitCandidate &); + void splitAroundRegion(LiveRangeEdit &, ArrayRef<unsigned>); + void calcGapWeights(MCRegister, SmallVectorImpl<float> &); + bool canEvictInterferenceInRange(const LiveInterval &VirtReg, + MCRegister PhysReg, SlotIndex Start, + SlotIndex End, EvictionCost &MaxCost) const; + MCRegister getCheapestEvicteeWeight(const AllocationOrder &Order, + const LiveInterval &VirtReg, + SlotIndex Start, SlotIndex End, + float *BestEvictWeight) const; + void evictInterference(LiveInterval &, MCRegister, + SmallVectorImpl<Register> &); + bool mayRecolorAllInterferences(MCRegister PhysReg, LiveInterval &VirtReg, + SmallLISet &RecoloringCandidates, + const SmallVirtRegSet &FixedRegisters); + + MCRegister tryAssign(LiveInterval &, AllocationOrder &, + SmallVectorImpl<Register> &, const SmallVirtRegSet &); + MCRegister tryEvict(LiveInterval &, AllocationOrder &, + SmallVectorImpl<Register> &, uint8_t, + const SmallVirtRegSet &); + MCRegister tryRegionSplit(LiveInterval &, AllocationOrder &, + SmallVectorImpl<Register> &); + /// Calculate cost of region splitting. + unsigned calculateRegionSplitCost(LiveInterval &VirtReg, + AllocationOrder &Order, + BlockFrequency &BestCost, + unsigned &NumCands, bool IgnoreCSR, + bool *CanCauseEvictionChain = nullptr); + /// Perform region splitting. + unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, + bool HasCompact, SmallVectorImpl<Register> &NewVRegs); + /// Check other options before using a callee-saved register for the first + /// time. + MCRegister tryAssignCSRFirstTime(LiveInterval &VirtReg, + AllocationOrder &Order, MCRegister PhysReg, + uint8_t &CostPerUseLimit, + SmallVectorImpl<Register> &NewVRegs); + void initializeCSRCost(); + unsigned tryBlockSplit(LiveInterval &, AllocationOrder &, + SmallVectorImpl<Register> &); + unsigned tryInstructionSplit(LiveInterval &, AllocationOrder &, + SmallVectorImpl<Register> &); + unsigned tryLocalSplit(LiveInterval &, AllocationOrder &, + SmallVectorImpl<Register> &); + unsigned trySplit(LiveInterval &, AllocationOrder &, + SmallVectorImpl<Register> &, const SmallVirtRegSet &); + unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &, + SmallVectorImpl<Register> &, + SmallVirtRegSet &, unsigned); + bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<Register> &, + SmallVirtRegSet &, unsigned); + void tryHintRecoloring(LiveInterval &); + void tryHintsRecoloring(); + + /// Model the information carried by one end of a copy. + struct HintInfo { + /// The frequency of the copy. + BlockFrequency Freq; + /// The virtual register or physical register. + Register Reg; + /// Its currently assigned register. + /// In case of a physical register Reg == PhysReg. + MCRegister PhysReg; + + HintInfo(BlockFrequency Freq, Register Reg, MCRegister PhysReg) + : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {} + }; + using HintsInfo = SmallVector<HintInfo, 4>; + + BlockFrequency getBrokenHintFreq(const HintsInfo &, MCRegister); + void collectHintInfo(Register, HintsInfo &); + + /// Greedy RA statistic to remark. + struct RAGreedyStats { + unsigned Reloads = 0; + unsigned FoldedReloads = 0; + unsigned ZeroCostFoldedReloads = 0; + unsigned Spills = 0; + unsigned FoldedSpills = 0; + unsigned Copies = 0; + float ReloadsCost = 0.0f; + float FoldedReloadsCost = 0.0f; + float SpillsCost = 0.0f; + float FoldedSpillsCost = 0.0f; + float CopiesCost = 0.0f; + + bool isEmpty() { + return !(Reloads || FoldedReloads || Spills || FoldedSpills || + ZeroCostFoldedReloads || Copies); + } + + void add(RAGreedyStats other) { + Reloads += other.Reloads; + FoldedReloads += other.FoldedReloads; + ZeroCostFoldedReloads += other.ZeroCostFoldedReloads; + Spills += other.Spills; + FoldedSpills += other.FoldedSpills; + Copies += other.Copies; + ReloadsCost += other.ReloadsCost; + FoldedReloadsCost += other.FoldedReloadsCost; + SpillsCost += other.SpillsCost; + FoldedSpillsCost += other.FoldedSpillsCost; + CopiesCost += other.CopiesCost; + } + + void report(MachineOptimizationRemarkMissed &R); + }; + + /// Compute statistic for a basic block. + RAGreedyStats computeStats(MachineBasicBlock &MBB); + + /// Compute and report statistic through a remark. + RAGreedyStats reportStats(MachineLoop *L); + + /// Report the statistic for each loop. + void reportStats(); +}; +} // namespace llvm +#endif // #ifndef LLVM_CODEGEN_REGALLOCGREEDY_H_ diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp index c0a07ec4c91d..424ad7419165 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -533,6 +533,22 @@ Register RegScavenger::scavengeRegister(const TargetRegisterClass *RC, Candidates.reset(*AI); } + // If we have already scavenged some registers, remove them from the + // candidates. If we end up recursively calling eliminateFrameIndex, we don't + // want to be clobbering previously scavenged registers or their associated + // stack slots. + for (ScavengedInfo &SI : Scavenged) { + if (SI.Reg) { + if (isRegUsed(SI.Reg)) { + LLVM_DEBUG( + dbgs() << "Removing " << printReg(SI.Reg, TRI) << + " from scavenging candidates since it was already scavenged\n"); + for (MCRegAliasIterator AI(SI.Reg, TRI, true); AI.isValid(); ++AI) + Candidates.reset(*AI); + } + } + } + // Try to find a register that's unused if there is one, as then we won't // have to spill. BitVector Available = getRegsAvailable(RC); @@ -553,6 +569,12 @@ Register RegScavenger::scavengeRegister(const TargetRegisterClass *RC, if (!AllowSpill) return 0; +#ifndef NDEBUG + for (ScavengedInfo &SI : Scavenged) { + assert(SI.Reg != SReg && "scavenged a previously scavenged register"); + } +#endif + ScavengedInfo &Scavenged = spill(SReg, *RC, SPAdj, I, UseMI); Scavenged.Restore = &*std::prev(UseMI); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp index 6e05de888cc0..a61a2b2728fa 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -30,8 +30,7 @@ using namespace llvm; ScoreboardHazardRecognizer::ScoreboardHazardRecognizer( const InstrItineraryData *II, const ScheduleDAG *SchedDAG, const char *ParentDebugType) - : ScheduleHazardRecognizer(), DebugType(ParentDebugType), ItinData(II), - DAG(SchedDAG) { + : DebugType(ParentDebugType), ItinData(II), DAG(SchedDAG) { (void)DebugType; // Determine the maximum depth of any itinerary. This determines the depth of // the scoreboard. We always make the scoreboard at least 1 cycle deep to diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 067ad819e0d2..932f263d2558 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -593,7 +593,7 @@ namespace { SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); SDValue MatchLoadCombine(SDNode *N); SDValue mergeTruncStores(StoreSDNode *N); - SDValue ReduceLoadWidth(SDNode *N); + SDValue reduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); SDValue splitMergedValStore(StoreSDNode *ST); SDValue TransformFPLoadStorePair(SDNode *N); @@ -1070,7 +1070,7 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, return DAG.getNode(Opc, DL, VT, N00, OpNode); return SDValue(); } - if (N0.hasOneUse()) { + if (TLI.isReassocProfitable(DAG, N0, N1)) { // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1) // iff (op x, c1) has one use if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1)) @@ -3058,9 +3058,8 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, // // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with // a single path for carry/borrow out propagation: -static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, - const TargetLowering &TLI, SDValue Carry0, - SDValue Carry1, SDNode *N) { +static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, + SDValue Carry0, SDValue Carry1, SDNode *N) { if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1) return SDValue(); unsigned Opcode = Carry0.getOpcode(); @@ -3908,7 +3907,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one // use. { - SDValue Sh(nullptr, 0), Y(nullptr, 0); + SDValue Sh, Y; // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). if (N0.getOpcode() == ISD::SHL && @@ -4471,15 +4470,15 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { return FoldedVOp; // fold (mulhs x, 0) -> 0 - // do not return N0/N1, because undef node may exist. - if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) || - ISD::isConstantSplatVectorAllZeros(N1.getNode())) + // do not return N1, because undef node may exist. + if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) return DAG.getConstant(0, DL, VT); } // fold (mulhs x, 0) -> 0 if (isNullConstant(N1)) return N1; + // fold (mulhs x, 1) -> (sra x, size(x)-1) if (isOneConstant(N1)) return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, @@ -4531,18 +4530,19 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { return FoldedVOp; // fold (mulhu x, 0) -> 0 - // do not return N0/N1, because undef node may exist. - if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) || - ISD::isConstantSplatVectorAllZeros(N1.getNode())) + // do not return N1, because undef node may exist. + if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) return DAG.getConstant(0, DL, VT); } // fold (mulhu x, 0) -> 0 if (isNullConstant(N1)) return N1; + // fold (mulhu x, 1) -> 0 if (isOneConstant(N1)) return DAG.getConstant(0, DL, N0.getValueType()); + // fold (mulhu x, undef) -> 0 if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, DL, VT); @@ -4892,6 +4892,42 @@ static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, : DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0)); } +static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, + SDValue N3, ISD::CondCode CC, + SelectionDAG &DAG) { + // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a + // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may + // be truncated versions of the the setcc (N0/N1). + if ((N0 != N2 && + (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) || + N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT) + return SDValue(); + ConstantSDNode *N1C = isConstOrConstSplat(N1); + ConstantSDNode *N3C = isConstOrConstSplat(N3); + if (!N1C || !N3C) + return SDValue(); + const APInt &C1 = N1C->getAPIntValue(); + const APInt &C3 = N3C->getAPIntValue(); + if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() || + C1 != C3.zextOrSelf(C1.getBitWidth())) + return SDValue(); + + unsigned BW = (C1 + 1).exactLogBase2(); + EVT FPVT = N0.getOperand(0).getValueType(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW); + if (FPVT.isVector()) + NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT, + FPVT.getVectorElementCount()); + if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(ISD::FP_TO_UINT_SAT, + FPVT, NewVT)) + return SDValue(); + + SDValue Sat = + DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0), + DAG.getValueType(NewVT.getScalarType())); + return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType()); +} + SDValue DAGCombiner::visitIMINMAX(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4934,6 +4970,9 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { if (SDValue S = PerformMinMaxFpToSatCombine( N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG)) return S; + if (Opcode == ISD::UMIN) + if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG)) + return S; // Simplify the operands using demanded-bits information. if (SimplifyDemandedBits(SDValue(N, 0))) @@ -5491,6 +5530,8 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N, // Some constants may need fixing up later if they are too large. if (auto *C = dyn_cast<ConstantSDNode>(Op)) { + if (Mask->getValueType(0) != C->getValueType(0)) + return false; if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) && (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue()) NodesWithConsts.insert(N); @@ -5524,9 +5565,9 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N, case ISD::AssertZext: { unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes(); EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); - EVT VT = Op.getOpcode() == ISD::AssertZext ? - cast<VTSDNode>(Op.getOperand(1))->getVT() : - Op.getOperand(0).getValueType(); + EVT VT = Op.getOpcode() == ISD::AssertZext + ? cast<VTSDNode>(Op.getOperand(1))->getVT() + : Op.getOperand(0).getValueType(); // We can accept extending nodes if the mask is wider or an equal // width to the original type. @@ -5534,6 +5575,15 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N, continue; break; } + case ISD::ANY_EXTEND: { + unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes(); + EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + EVT VT = Op.getOperand(0).getValueType(); + if (ExtVT.bitsGE(VT)) + break; + // Fallthrough to searching for nodes from the operands of the extend. + LLVM_FALLTHROUGH; + } case ISD::OR: case ISD::XOR: case ISD::AND: @@ -5593,12 +5643,14 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) { // masking. if (FixupNode) { LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump()); - SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode), - FixupNode->getValueType(0), - SDValue(FixupNode, 0), MaskOp); + SDValue MaskOpT = DAG.getZExtOrTrunc(MaskOp, SDLoc(FixupNode), + FixupNode->getValueType(0)); + SDValue And = + DAG.getNode(ISD::AND, SDLoc(FixupNode), FixupNode->getValueType(0), + SDValue(FixupNode, 0), MaskOpT); DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And); if (And.getOpcode() == ISD ::AND) - DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp); + DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOpT); } // Narrow any constants that need it. @@ -5607,10 +5659,12 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) { SDValue Op1 = LogicN->getOperand(1); if (isa<ConstantSDNode>(Op0)) - std::swap(Op0, Op1); + std::swap(Op0, Op1); - SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), - Op1, MaskOp); + SDValue MaskOpT = + DAG.getZExtOrTrunc(MaskOp, SDLoc(Op1), Op1.getValueType()); + SDValue And = + DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOpT); DAG.UpdateNodeOperands(LogicN, Op0, And); } @@ -5618,13 +5672,15 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) { // Create narrow loads. for (auto *Load : Loads) { LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump()); + SDValue MaskOpT = + DAG.getZExtOrTrunc(MaskOp, SDLoc(Load), Load->getValueType(0)); SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0), - SDValue(Load, 0), MaskOp); + SDValue(Load, 0), MaskOpT); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And); if (And.getOpcode() == ISD ::AND) And = SDValue( - DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0); - SDValue NewLoad = ReduceLoadWidth(And.getNode()); + DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOpT), 0); + SDValue NewLoad = reduceLoadWidth(And.getNode()); assert(NewLoad && "Shouldn't be masking the load if it can't be narrowed"); CombineTo(Load, NewLoad, NewLoad.getValue(1)); @@ -5799,18 +5855,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return FoldedVOp; // fold (and x, 0) -> 0, vector edition - if (ISD::isConstantSplatVectorAllZeros(N0.getNode())) - // do not return N0, because undef node may exist in N0 - return DAG.getConstant(APInt::getZero(N0.getScalarValueSizeInBits()), - SDLoc(N), N0.getValueType()); if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) // do not return N1, because undef node may exist in N1 return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()), SDLoc(N), N1.getValueType()); // fold (and x, -1) -> x, vector edition - if (ISD::isConstantSplatVectorAllOnes(N0.getNode())) - return N1; if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) return N0; @@ -5862,7 +5912,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue Shuffle = XformToShuffleWithZero(N)) return Shuffle; - if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N)) + if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N)) return Combined; // fold (and (or x, C), D) -> D if (C & D) == D @@ -6024,7 +6074,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD || (N0.getOpcode() == ISD::ANY_EXTEND && N0.getOperand(0).getOpcode() == ISD::LOAD))) { - if (SDValue Res = ReduceLoadWidth(N)) { + if (SDValue Res = reduceLoadWidth(N)) { LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0); AddToWorklist(N); @@ -6659,7 +6709,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (SDValue Combined = visitORLike(N0, N1, N)) return Combined; - if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N)) + if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N)) return Combined; // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) @@ -8156,7 +8206,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); - if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N)) + if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N)) return Combined; return SDValue(); @@ -8948,6 +8998,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (SDValue MULH = combineShiftToMULH(N, DAG, TLI)) return MULH; + // Attempt to convert a sra of a load into a narrower sign-extending load. + if (SDValue NarrowLoad = reduceLoadWidth(N)) + return NarrowLoad; + return SDValue(); } @@ -9140,7 +9194,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return NewSRL; // Attempt to convert a srl of a load into a narrower zero-extending load. - if (SDValue NarrowLoad = ReduceLoadWidth(N)) + if (SDValue NarrowLoad = reduceLoadWidth(N)) return NarrowLoad; // Here is a common situation. We want to optimize: @@ -9358,6 +9412,17 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) { // fold (bswap (bswap x)) -> x if (N0.getOpcode() == ISD::BSWAP) return N0->getOperand(0); + + // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse + // isn't supported, it will be expanded to bswap followed by a manual reversal + // of bits in each byte. By placing bswaps before bitreverse, we can remove + // the two bswaps if the bitreverse gets expanded. + if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) { + SDLoc DL(N); + SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0)); + return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap); + } + return SDValue(); } @@ -10288,6 +10353,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG)) return S; + if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG)) + return S; // If this select has a condition (setcc) with narrower operands than the // select, try to widen the compare to match the select width. @@ -11357,7 +11424,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::TRUNCATE) { // fold (sext (truncate (load x))) -> (sext (smaller load x)) // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) - if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { + if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) { SDNode *oye = N0.getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); @@ -11621,7 +11688,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::TRUNCATE) { // fold (zext (truncate (load x))) -> (zext (smaller load x)) // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) - if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { + if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) { SDNode *oye = N0.getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); @@ -11864,7 +11931,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // fold (aext (truncate (load x))) -> (aext (smaller load x)) // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) if (N0.getOpcode() == ISD::TRUNCATE) { - if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { + if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) { SDNode *oye = N0.getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); @@ -12095,13 +12162,10 @@ SDValue DAGCombiner::visitAssertAlign(SDNode *N) { return SDValue(); } -/// If the result of a wider load is shifted to right of N bits and then -/// truncated to a narrower type and where N is a multiple of number of bits of -/// the narrower type, transform it to a narrower load from address + N / num of -/// bits of new type. Also narrow the load if the result is masked with an AND -/// to effectively produce a smaller type. If the result is to be extended, also -/// fold the extension to form a extending load. -SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { +/// If the result of a load is shifted/masked/truncated to an effectively +/// narrower type, try to transform the load to a narrower type and/or +/// use an extending load. +SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { unsigned Opc = N->getOpcode(); ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; @@ -12113,32 +12177,48 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if (VT.isVector()) return SDValue(); + // The ShAmt variable is used to indicate that we've consumed a right + // shift. I.e. we want to narrow the width of the load by skipping to load the + // ShAmt least significant bits. unsigned ShAmt = 0; + // A special case is when the least significant bits from the load are masked + // away, but using an AND rather than a right shift. HasShiftedOffset is used + // to indicate that the narrowed load should be left-shifted ShAmt bits to get + // the result. bool HasShiftedOffset = false; // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then // extended to VT. if (Opc == ISD::SIGN_EXTEND_INREG) { ExtType = ISD::SEXTLOAD; ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); - } else if (Opc == ISD::SRL) { - // Another special-case: SRL is basically zero-extending a narrower value, - // or it maybe shifting a higher subword, half or byte into the lowest - // bits. - ExtType = ISD::ZEXTLOAD; - N0 = SDValue(N, 0); + } else if (Opc == ISD::SRL || Opc == ISD::SRA) { + // Another special-case: SRL/SRA is basically zero/sign-extending a narrower + // value, or it may be shifting a higher subword, half or byte into the + // lowest bits. - auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0)); - auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); - if (!N01 || !LN0) + // Only handle shift with constant shift amount, and the shiftee must be a + // load. + auto *LN = dyn_cast<LoadSDNode>(N0); + auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!N1C || !LN) + return SDValue(); + // If the shift amount is larger than the memory type then we're not + // accessing any of the loaded bytes. + ShAmt = N1C->getZExtValue(); + uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits(); + if (MemoryWidth <= ShAmt) + return SDValue(); + // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD. + ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD; + ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt); + // If original load is a SEXTLOAD then we can't simply replace it by a + // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD + // followed by a ZEXT, but that is not handled at the moment). Similarly if + // the original load is a ZEXTLOAD and we want to use a SEXTLOAD. + if ((LN->getExtensionType() == ISD::SEXTLOAD || + LN->getExtensionType() == ISD::ZEXTLOAD) && + LN->getExtensionType() != ExtType) return SDValue(); - - uint64_t ShiftAmt = N01->getZExtValue(); - uint64_t MemoryWidth = LN0->getMemoryVT().getScalarSizeInBits(); - if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt) - ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt); - else - ExtVT = EVT::getIntegerVT(*DAG.getContext(), - VT.getScalarSizeInBits() - ShiftAmt); } else if (Opc == ISD::AND) { // An AND with a constant mask is the same as a truncate + zero-extend. auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1)); @@ -12161,55 +12241,80 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); } - if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { - SDValue SRL = N0; - if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) { - ShAmt = ConstShift->getZExtValue(); - unsigned EVTBits = ExtVT.getScalarSizeInBits(); - // Is the shift amount a multiple of size of VT? - if ((ShAmt & (EVTBits-1)) == 0) { - N0 = N0.getOperand(0); - // Is the load width a multiple of size of VT? - if ((N0.getScalarValueSizeInBits() & (EVTBits - 1)) != 0) - return SDValue(); - } + // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing + // a right shift. Here we redo some of those checks, to possibly adjust the + // ExtVT even further based on "a masking AND". We could also end up here for + // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks + // need to be done here as well. + if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) { + SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0; + // Bail out when the SRL has more than one use. This is done for historical + // (undocumented) reasons. Maybe intent was to guard the AND-masking below + // check below? And maybe it could be non-profitable to do the transform in + // case the SRL has multiple uses and we get here with Opc!=ISD::SRL? + // FIXME: Can't we just skip this check for the Opc==ISD::SRL case. + if (!SRL.hasOneUse()) + return SDValue(); + + // Only handle shift with constant shift amount, and the shiftee must be a + // load. + auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0)); + auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1)); + if (!SRL1C || !LN) + return SDValue(); - // At this point, we must have a load or else we can't do the transform. - auto *LN0 = dyn_cast<LoadSDNode>(N0); - if (!LN0) return SDValue(); + // If the shift amount is larger than the input type then we're not + // accessing any of the loaded bytes. If the load was a zextload/extload + // then the result of the shift+trunc is zero/undef (handled elsewhere). + ShAmt = SRL1C->getZExtValue(); + uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits(); + if (ShAmt >= MemoryWidth) + return SDValue(); - // Because a SRL must be assumed to *need* to zero-extend the high bits - // (as opposed to anyext the high bits), we can't combine the zextload - // lowering of SRL and an sextload. - if (LN0->getExtensionType() == ISD::SEXTLOAD) - return SDValue(); + // Because a SRL must be assumed to *need* to zero-extend the high bits + // (as opposed to anyext the high bits), we can't combine the zextload + // lowering of SRL and an sextload. + if (LN->getExtensionType() == ISD::SEXTLOAD) + return SDValue(); - // If the shift amount is larger than the input type then we're not - // accessing any of the loaded bytes. If the load was a zextload/extload - // then the result of the shift+trunc is zero/undef (handled elsewhere). - if (ShAmt >= LN0->getMemoryVT().getSizeInBits()) + // Avoid reading outside the memory accessed by the original load (could + // happened if we only adjust the load base pointer by ShAmt). Instead we + // try to narrow the load even further. The typical scenario here is: + // (i64 (truncate (i96 (srl (load x), 64)))) -> + // (i64 (truncate (i96 (zextload (load i32 + offset) from i32)))) + if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) { + // Don't replace sextload by zextload. + if (ExtType == ISD::SEXTLOAD) return SDValue(); - - // If the SRL is only used by a masking AND, we may be able to adjust - // the ExtVT to make the AND redundant. - SDNode *Mask = *(SRL->use_begin()); - if (Mask->getOpcode() == ISD::AND && - isa<ConstantSDNode>(Mask->getOperand(1))) { - const APInt& ShiftMask = Mask->getConstantOperandAPInt(1); - if (ShiftMask.isMask()) { - EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), - ShiftMask.countTrailingOnes()); - // If the mask is smaller, recompute the type. - if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) && - TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT)) - ExtVT = MaskedVT; - } + // Narrow the load. + ExtType = ISD::ZEXTLOAD; + ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt); + } + + // If the SRL is only used by a masking AND, we may be able to adjust + // the ExtVT to make the AND redundant. + SDNode *Mask = *(SRL->use_begin()); + if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND && + isa<ConstantSDNode>(Mask->getOperand(1))) { + const APInt& ShiftMask = Mask->getConstantOperandAPInt(1); + if (ShiftMask.isMask()) { + EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), + ShiftMask.countTrailingOnes()); + // If the mask is smaller, recompute the type. + if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) && + TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) + ExtVT = MaskedVT; } } + + N0 = SRL.getOperand(0); } - // If the load is shifted left (and the result isn't shifted back right), - // we can fold the truncate through the shift. + // If the load is shifted left (and the result isn't shifted back right), we + // can fold a truncate through the shift. The typical scenario is that N + // points at a TRUNCATE here so the attempted fold is: + // (truncate (shl (load x), c))) -> (shl (narrow load x), c) + // ShLeftAmt will indicate how much a narrowed load should be shifted left. unsigned ShLeftAmt = 0; if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { @@ -12237,12 +12342,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { return LVTStoreBits - EVTStoreBits - ShAmt; }; - // For big endian targets, we need to adjust the offset to the pointer to - // load the correct bytes. - if (DAG.getDataLayout().isBigEndian()) - ShAmt = AdjustBigEndianShift(ShAmt); + // We need to adjust the pointer to the load by ShAmt bits in order to load + // the correct bytes. + unsigned PtrAdjustmentInBits = + DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt; - uint64_t PtrOff = ShAmt / 8; + uint64_t PtrOff = PtrAdjustmentInBits / 8; Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff); SDLoc DL(LN0); // The original load itself didn't wrap, so an offset within it doesn't. @@ -12285,11 +12390,6 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { } if (HasShiftedOffset) { - // Recalculate the shift amount after it has been altered to calculate - // the offset. - if (DAG.getDataLayout().isBigEndian()) - ShAmt = AdjustBigEndianShift(ShAmt); - // We're using a shifted mask, so the load now has an offset. This means // that data has been loaded into the lower bytes than it would have been // before, so we need to shl the loaded data into the correct position in the @@ -12320,7 +12420,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); // If the input is already sign extended, just drop the extension. - if (ExtVTBits >= DAG.ComputeMinSignedBits(N0)) + if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0)) return N0; // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 @@ -12336,7 +12436,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N00 = N0.getOperand(0); unsigned N00Bits = N00.getScalarValueSizeInBits(); - if ((N00Bits <= ExtVTBits || DAG.ComputeMinSignedBits(N00) <= ExtVTBits) && + if ((N00Bits <= ExtVTBits || + DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00); } @@ -12355,7 +12456,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts); if ((N00Bits == ExtVTBits || (!IsZext && (N00Bits < ExtVTBits || - DAG.ComputeMinSignedBits(N00) <= ExtVTBits))) && + DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))) return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00); @@ -12381,7 +12482,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // fold (sext_in_reg (load x)) -> (smaller sextload x) // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) - if (SDValue NarrowLoad = ReduceLoadWidth(N)) + if (SDValue NarrowLoad = reduceLoadWidth(N)) return NarrowLoad; // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) @@ -12668,7 +12769,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { - if (SDValue Reduced = ReduceLoadWidth(N)) + if (SDValue Reduced = reduceLoadWidth(N)) return Reduced; // Handle the case where the load remains an extending load even @@ -17491,6 +17592,10 @@ void DAGCombiner::getStoreMergeCandidates( for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2) TryToAddCandidate(I2); } + // Check stores that depend on the root (e.g. Store 3 in the chart above). + if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) { + TryToAddCandidate(I); + } } } else { for (auto I = RootNode->use_begin(), E = RootNode->use_end(); @@ -18351,6 +18456,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { Value.getValueType().isInteger() && (!isa<ConstantSDNode>(Value) || !cast<ConstantSDNode>(Value)->isOpaque())) { + // Convert a truncating store of a extension into a standard store. + if ((Value.getOpcode() == ISD::ZERO_EXTEND || + Value.getOpcode() == ISD::SIGN_EXTEND || + Value.getOpcode() == ISD::ANY_EXTEND) && + Value.getOperand(0).getValueType() == ST->getMemoryVT() && + TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT())) + return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, + ST->getMemOperand()); + APInt TruncDemandedBits = APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), ST->getMemoryVT().getScalarSizeInBits()); @@ -23299,6 +23413,8 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG)) return S; + if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG)) + return S; return SDValue(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 4d1449bc2751..bfde35935c7b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1775,12 +1775,13 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) { return false; case Instruction::Call: - // On AIX, call lowering uses the DAG-ISEL path currently so that the + // On AIX, normal call lowering uses the DAG-ISEL path currently so that the // callee of the direct function call instruction will be mapped to the // symbol for the function's entry point, which is distinct from the // function descriptor symbol. The latter is the symbol whose XCOFF symbol // name is the C-linkage name of the source level function. - if (TM.getTargetTriple().isOSAIX()) + // But fast isel still has the ability to do selection for intrinsics. + if (TM.getTargetTriple().isOSAIX() && !isa<IntrinsicInst>(I)) return false; return selectCall(I); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 5dfb65ef131a..54481b94fdd8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3593,9 +3593,16 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (Legalized) { // If we expanded the SETCC by swapping LHS and RHS, or by inverting the // condition code, create a new SETCC node. - if (Tmp3.getNode()) - Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), - Tmp1, Tmp2, Tmp3, Node->getFlags()); + if (Tmp3.getNode()) { + if (IsStrict) { + Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getVTList(), + {Chain, Tmp1, Tmp2, Tmp3}, Node->getFlags()); + Chain = Tmp1.getValue(1); + } else { + Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Tmp1, + Tmp2, Tmp3, Node->getFlags()); + } + } // If we expanded the SETCC by inverting the condition code, then wrap // the existing SETCC in a NOT to restore the intended condition. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 27f9cede1922..6bf38d7296a8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1193,7 +1193,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { llvm_unreachable("Do not know how to expand the result of this operator!"); case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; - case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT: SplitRes_Select(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::MERGE_VALUES: ExpandRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 518e525e13d0..8c7b90b6cd33 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -75,30 +75,28 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { break; case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast<MaskedGatherSDNode>(N)); break; - case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break; - case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break; + case ISD::SELECT: + case ISD::VSELECT: + case ISD::VP_SELECT: + Res = PromoteIntRes_Select(N); + break; case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break; case ISD::SMIN: - case ISD::SMAX: - Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ false); - break; + case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break; case ISD::UMIN: case ISD::UMAX: Res = PromoteIntRes_UMINUMAX(N); break; case ISD::SHL: - Res = PromoteIntRes_SHL(N, /*IsVP*/ false); - break; + case ISD::VP_SHL: Res = PromoteIntRes_SHL(N); break; case ISD::SIGN_EXTEND_INREG: Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break; case ISD::SRA: - Res = PromoteIntRes_SRA(N, /*IsVP*/ false); - break; + case ISD::VP_ASHR: Res = PromoteIntRes_SRA(N); break; case ISD::SRL: - Res = PromoteIntRes_SRL(N, /*IsVP*/ false); - break; + case ISD::VP_LSHR: Res = PromoteIntRes_SRL(N); break; case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break; case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break; case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break; @@ -154,18 +152,22 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ADD: case ISD::SUB: case ISD::MUL: - Res = PromoteIntRes_SimpleIntBinOp(N, /*IsVP*/ false); - break; + case ISD::VP_AND: + case ISD::VP_OR: + case ISD::VP_XOR: + case ISD::VP_ADD: + case ISD::VP_SUB: + case ISD::VP_MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break; case ISD::SDIV: case ISD::SREM: - Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ false); - break; + case ISD::VP_SDIV: + case ISD::VP_SREM: Res = PromoteIntRes_SExtIntBinOp(N); break; case ISD::UDIV: case ISD::UREM: - Res = PromoteIntRes_ZExtIntBinOp(N, /*IsVP*/ false); - break; + case ISD::VP_UDIV: + case ISD::VP_UREM: Res = PromoteIntRes_ZExtIntBinOp(N); break; case ISD::SADDO: case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break; @@ -260,32 +262,6 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::FSHR: Res = PromoteIntRes_FunnelShift(N); break; - - case ISD::VP_AND: - case ISD::VP_OR: - case ISD::VP_XOR: - case ISD::VP_ADD: - case ISD::VP_SUB: - case ISD::VP_MUL: - Res = PromoteIntRes_SimpleIntBinOp(N, /*IsVP*/ true); - break; - case ISD::VP_SDIV: - case ISD::VP_SREM: - Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ true); - break; - case ISD::VP_UDIV: - case ISD::VP_UREM: - Res = PromoteIntRes_ZExtIntBinOp(N, /*IsVP*/ true); - break; - case ISD::VP_SHL: - Res = PromoteIntRes_SHL(N, /*IsVP*/ true); - break; - case ISD::VP_ASHR: - Res = PromoteIntRes_SRA(N, /*IsVP*/ true); - break; - case ISD::VP_LSHR: - Res = PromoteIntRes_SRL(N, /*IsVP*/ true); - break; } // If the result is null then the sub-method took care of registering it. @@ -1127,20 +1103,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { return Res; } -SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) { - SDValue LHS = GetPromotedInteger(N->getOperand(1)); - SDValue RHS = GetPromotedInteger(N->getOperand(2)); - return DAG.getSelect(SDLoc(N), - LHS.getValueType(), N->getOperand(0), LHS, RHS); -} - -SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_Select(SDNode *N) { SDValue Mask = N->getOperand(0); SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); - return DAG.getNode(ISD::VSELECT, SDLoc(N), - LHS.getValueType(), Mask, LHS, RHS); + + unsigned Opcode = N->getOpcode(); + return Opcode == ISD::VP_SELECT + ? DAG.getNode(Opcode, SDLoc(N), LHS.getValueType(), Mask, LHS, RHS, + N->getOperand(3)) + : DAG.getNode(Opcode, SDLoc(N), LHS.getValueType(), Mask, LHS, + RHS); } SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) { @@ -1193,12 +1167,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { return DAG.getSExtOrTrunc(SetCC, dl, NVT); } -SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N, bool IsVP) { +SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { SDValue LHS = GetPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) RHS = ZExtPromotedInteger(RHS); - if (!IsVP) + if (N->getOpcode() != ISD::VP_SHL) return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, N->getOperand(2), N->getOperand(3)); @@ -1210,34 +1184,40 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { Op.getValueType(), Op, N->getOperand(1)); } -SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N, bool IsVP) { +SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { // The input may have strange things in the top bits of the registers, but // these operations don't care. They may have weird bits going out, but // that too is okay if they are integer operations. SDValue LHS = GetPromotedInteger(N->getOperand(0)); SDValue RHS = GetPromotedInteger(N->getOperand(1)); - if (!IsVP) + if (N->getNumOperands() == 2) return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + assert(N->getNumOperands() == 4 && "Unexpected number of operands!"); + assert(N->isVPOpcode() && "Expected VP opcode"); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, N->getOperand(2), N->getOperand(3)); } -SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N, bool IsVP) { +SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) { // Sign extend the input. SDValue LHS = SExtPromotedInteger(N->getOperand(0)); SDValue RHS = SExtPromotedInteger(N->getOperand(1)); - if (!IsVP) + if (N->getNumOperands() == 2) return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + assert(N->getNumOperands() == 4 && "Unexpected number of operands!"); + assert(N->isVPOpcode() && "Expected VP opcode"); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, N->getOperand(2), N->getOperand(3)); } -SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N, bool IsVP) { +SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) { // Zero extend the input. SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); - if (!IsVP) + if (N->getNumOperands() == 2) return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + assert(N->getNumOperands() == 4 && "Unexpected number of operands!"); + assert(N->isVPOpcode() && "Expected VP opcode"); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, N->getOperand(2), N->getOperand(3)); } @@ -1251,25 +1231,25 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) { LHS.getValueType(), LHS, RHS); } -SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N, bool IsVP) { +SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { // The input value must be properly sign extended. SDValue LHS = SExtPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) RHS = ZExtPromotedInteger(RHS); - if (!IsVP) + if (N->getOpcode() != ISD::VP_ASHR) return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, N->getOperand(2), N->getOperand(3)); } -SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N, bool IsVP) { +SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { // The input value must be properly zero extended. SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) RHS = ZExtPromotedInteger(RHS); - if (!IsVP) + if (N->getOpcode() != ISD::VP_LSHR) return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, N->getOperand(2), N->getOperand(3)); @@ -1653,7 +1633,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::UDIVFIX: case ISD::UDIVFIXSAT: Res = PromoteIntOp_FIX(N); break; - case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break; + case ISD::FPOWI: + case ISD::STRICT_FPOWI: Res = PromoteIntOp_FPOWI(N); break; case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: @@ -1703,50 +1684,64 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { /// PromoteSetCCOperands - Promote the operands of a comparison. This code is /// shared among BR_CC, SELECT_CC, and SETCC handlers. -void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, +void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &LHS, SDValue &RHS, ISD::CondCode CCCode) { // We have to insert explicit sign or zero extends. Note that we could // insert sign extends for ALL conditions. For those operations where either - // zero or sign extension would be valid, use SExtOrZExtPromotedInteger - // which will choose the cheapest for the target. - switch (CCCode) { - default: llvm_unreachable("Unknown integer comparison!"); - case ISD::SETEQ: - case ISD::SETNE: { - SDValue OpL = GetPromotedInteger(NewLHS); - SDValue OpR = GetPromotedInteger(NewRHS); - - // We would prefer to promote the comparison operand with sign extension. - // If the width of OpL/OpR excluding the duplicated sign bits is no greater - // than the width of NewLHS/NewRH, we can avoid inserting real truncate - // instruction, which is redundant eventually. - unsigned OpLEffectiveBits = DAG.ComputeMinSignedBits(OpL); - unsigned OpREffectiveBits = DAG.ComputeMinSignedBits(OpR); - if (OpLEffectiveBits <= NewLHS.getScalarValueSizeInBits() && - OpREffectiveBits <= NewRHS.getScalarValueSizeInBits()) { - NewLHS = OpL; - NewRHS = OpR; - } else { - NewLHS = SExtOrZExtPromotedInteger(NewLHS); - NewRHS = SExtOrZExtPromotedInteger(NewRHS); + // zero or sign extension would be valid, we ask the target which extension + // it would prefer. + + // Signed comparisons always require sign extension. + if (ISD::isSignedIntSetCC(CCCode)) { + LHS = SExtPromotedInteger(LHS); + RHS = SExtPromotedInteger(RHS); + return; + } + + assert((ISD::isUnsignedIntSetCC(CCCode) || ISD::isIntEqualitySetCC(CCCode)) && + "Unknown integer comparison!"); + + SDValue OpL = GetPromotedInteger(LHS); + SDValue OpR = GetPromotedInteger(RHS); + + if (TLI.isSExtCheaperThanZExt(LHS.getValueType(), OpL.getValueType())) { + // The target would prefer to promote the comparison operand with sign + // extension. Honor that unless the promoted values are already zero + // extended. + unsigned OpLEffectiveBits = + DAG.computeKnownBits(OpL).countMaxActiveBits(); + unsigned OpREffectiveBits = + DAG.computeKnownBits(OpR).countMaxActiveBits(); + if (OpLEffectiveBits <= LHS.getScalarValueSizeInBits() && + OpREffectiveBits <= RHS.getScalarValueSizeInBits()) { + LHS = OpL; + RHS = OpR; + return; } - break; + + // The promoted values aren't zero extended, use a sext_inreg. + LHS = SExtPromotedInteger(LHS); + RHS = SExtPromotedInteger(RHS); + return; } - case ISD::SETUGE: - case ISD::SETUGT: - case ISD::SETULE: - case ISD::SETULT: - NewLHS = SExtOrZExtPromotedInteger(NewLHS); - NewRHS = SExtOrZExtPromotedInteger(NewRHS); - break; - case ISD::SETGE: - case ISD::SETGT: - case ISD::SETLT: - case ISD::SETLE: - NewLHS = SExtPromotedInteger(NewLHS); - NewRHS = SExtPromotedInteger(NewRHS); - break; + + // Prefer to promote the comparison operand with zero extension. + + // If the width of OpL/OpR excluding the duplicated sign bits is no greater + // than the width of LHS/RHS, we can avoid/ inserting a zext_inreg operation + // that we might not be able to remove. + unsigned OpLEffectiveBits = DAG.ComputeMaxSignificantBits(OpL); + unsigned OpREffectiveBits = DAG.ComputeMaxSignificantBits(OpR); + if (OpLEffectiveBits <= LHS.getScalarValueSizeInBits() && + OpREffectiveBits <= RHS.getScalarValueSizeInBits()) { + LHS = OpL; + RHS = OpR; + return; } + + // Otherwise, use zext_inreg. + LHS = ZExtPromotedInteger(LHS); + RHS = ZExtPromotedInteger(RHS); } SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) { @@ -2099,8 +2094,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) { } SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) { - // FIXME: Support for promotion of STRICT_FPOWI is not implemented yet. - assert(N->getOpcode() == ISD::FPOWI && "No STRICT_FPOWI support here yet."); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); // The integer operand is the last operand in FPOWI (so the result and // floating point operand is already type legalized). @@ -2118,17 +2113,19 @@ SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) { DAG.getContext()->emitError("Don't know how to promote fpowi to fpow"); return DAG.getUNDEF(N->getValueType(0)); } + unsigned OpOffset = IsStrict ? 1 : 0; // The exponent should fit in a sizeof(int) type for the libcall to be valid. assert(DAG.getLibInfo().getIntSize() == - N->getOperand(1).getValueType().getSizeInBits() && + N->getOperand(1 + OpOffset).getValueType().getSizeInBits() && "POWI exponent should match with sizeof(int) when doing the libcall."); TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - std::pair<SDValue, SDValue> Tmp = - TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, - CallOptions, SDLoc(N), SDValue()); + SDValue Ops[2] = {N->getOperand(0 + OpOffset), N->getOperand(1 + OpOffset)}; + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall( + DAG, LC, N->getValueType(0), Ops, CallOptions, SDLoc(N), Chain); ReplaceValueWith(SDValue(N, 0), Tmp.first); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); return SDValue(); } @@ -2255,7 +2252,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ARITH_FENCE: SplitRes_ARITH_FENCE(N, Lo, Hi); break; case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; - case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT: SplitRes_Select(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; case ISD::FREEZE: SplitRes_FREEZE(N, Lo, Hi); break; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index da282ecad282..4d8daa82d8c0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -334,18 +334,17 @@ private: SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N); SDValue PromoteIntRes_Overflow(SDNode *N); SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo); - SDValue PromoteIntRes_SELECT(SDNode *N); - SDValue PromoteIntRes_VSELECT(SDNode *N); + SDValue PromoteIntRes_Select(SDNode *N); SDValue PromoteIntRes_SELECT_CC(SDNode *N); SDValue PromoteIntRes_SETCC(SDNode *N); - SDValue PromoteIntRes_SHL(SDNode *N, bool IsVP); - SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N, bool IsVP); - SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N, bool IsVP); - SDValue PromoteIntRes_SExtIntBinOp(SDNode *N, bool IsVP); + SDValue PromoteIntRes_SHL(SDNode *N); + SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N); + SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N); + SDValue PromoteIntRes_SExtIntBinOp(SDNode *N); SDValue PromoteIntRes_UMINUMAX(SDNode *N); SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N); - SDValue PromoteIntRes_SRA(SDNode *N, bool IsVP); - SDValue PromoteIntRes_SRL(SDNode *N, bool IsVP); + SDValue PromoteIntRes_SRA(SDNode *N); + SDValue PromoteIntRes_SRL(SDNode *N); SDValue PromoteIntRes_TRUNCATE(SDNode *N); SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo); @@ -819,6 +818,12 @@ private: void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi); void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi); + /// Split mask operator of a VP intrinsic. + std::pair<SDValue, SDValue> SplitMask(SDValue Mask); + + /// Split mask operator of a VP intrinsic in a given location. + std::pair<SDValue, SDValue> SplitMask(SDValue Mask, const SDLoc &DL); + // Helper function for incrementing the pointer when splitting // memory operations void IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI, @@ -826,7 +831,7 @@ private: // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>. void SplitVectorResult(SDNode *N, unsigned ResNo); - void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi, bool IsVP); + void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -847,8 +852,10 @@ private: void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi); - void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi); + void SplitVecRes_Gather(MemSDNode *VPGT, SDValue &Lo, SDValue &Hi, + bool SplitSETCC = false); void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -864,6 +871,7 @@ private: SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo); SDValue SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo); SDValue SplitVecOp_VECREDUCE_SEQ(SDNode *N); + SDValue SplitVecOp_VP_REDUCE(SDNode *N, unsigned OpNo); SDValue SplitVecOp_UnaryOp(SDNode *N); SDValue SplitVecOp_TruncateHelper(SDNode *N); @@ -873,9 +881,10 @@ private: SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_ExtVecInRegOp(SDNode *N); SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); - SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo); - SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, unsigned OpNo); + SDValue SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo); + SDValue SplitVecOp_Gather(MemSDNode *MGT, unsigned OpNo); SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); SDValue SplitVecOp_VSETCC(SDNode *N); SDValue SplitVecOp_FP_ROUND(SDNode *N); @@ -900,6 +909,23 @@ private: } void SetWidenedVector(SDValue Op, SDValue Result); + /// Given a mask Mask, returns the larger vector into which Mask was widened. + SDValue GetWidenedMask(SDValue Mask, ElementCount EC) { + // For VP operations, we must also widen the mask. Note that the mask type + // may not actually need widening, leading it be split along with the VP + // operation. + // FIXME: This could lead to an infinite split/widen loop. We only handle + // the case where the mask needs widening to an identically-sized type as + // the vector inputs. + assert(getTypeAction(Mask.getValueType()) == + TargetLowering::TypeWidenVector && + "Unable to widen binary VP op"); + Mask = GetWidenedVector(Mask); + assert(Mask.getValueType().getVectorElementCount() == EC && + "Unable to widen binary VP op"); + return Mask; + } + // Widen Vector Result Promotion. void WidenVectorResult(SDNode *N, unsigned ResNo); SDValue WidenVecRes_MERGE_VALUES(SDNode* N, unsigned ResNo); @@ -911,10 +937,12 @@ private: SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); + SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N); + SDValue WidenVecRes_VP_GATHER(VPGatherSDNode* N); SDValue WidenVecRes_ScalarOp(SDNode* N); - SDValue WidenVecRes_SELECT(SDNode* N); + SDValue WidenVecRes_Select(SDNode *N); SDValue WidenVSELECTMask(SDNode *N); SDValue WidenVecRes_SELECT_CC(SDNode* N); SDValue WidenVecRes_SETCC(SDNode* N); @@ -923,7 +951,7 @@ private: SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); SDValue WidenVecRes_Ternary(SDNode *N); - SDValue WidenVecRes_Binary(SDNode *N, bool IsVP); + SDValue WidenVecRes_Binary(SDNode *N); SDValue WidenVecRes_BinaryCanTrap(SDNode *N); SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N); SDValue WidenVecRes_StrictFP(SDNode *N); @@ -945,9 +973,11 @@ private: SDValue WidenVecOp_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); + SDValue WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo); SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo); + SDValue WidenVecOp_VP_SCATTER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_SETCC(SDNode* N); SDValue WidenVecOp_STRICT_FSETCC(SDNode* N); SDValue WidenVecOp_VSELECT(SDNode *N); @@ -957,6 +987,7 @@ private: SDValue WidenVecOp_FCOPYSIGN(SDNode *N); SDValue WidenVecOp_VECREDUCE(SDNode *N); SDValue WidenVecOp_VECREDUCE_SEQ(SDNode *N); + SDValue WidenVecOp_VP_REDUCE(SDNode *N); /// Helper function to generate a set of operations to perform /// a vector operation for a wider type. @@ -1023,7 +1054,7 @@ private: void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi); void SplitRes_ARITH_FENCE (SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_Select (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 3d3c9a2ad837..c6885677d644 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -506,9 +506,10 @@ void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo, GetSplitOp(Op, Lo, Hi); } -void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) { +void DAGTypeLegalizer::SplitRes_Select(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LL, LH, RL, RH, CL, CH; SDLoc dl(N); + unsigned Opcode = N->getOpcode(); GetSplitOp(N->getOperand(1), LL, LH); GetSplitOp(N->getOperand(2), RL, RH); @@ -539,8 +540,18 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) { std::tie(CL, CH) = DAG.SplitVector(Cond, dl); } - Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL); - Hi = DAG.getNode(N->getOpcode(), dl, LH.getValueType(), CH, LH, RH); + if (Opcode != ISD::VP_SELECT && Opcode != ISD::VP_MERGE) { + Lo = DAG.getNode(Opcode, dl, LL.getValueType(), CL, LL, RL); + Hi = DAG.getNode(Opcode, dl, LH.getValueType(), CH, LH, RH); + return; + } + + SDValue EVLLo, EVLHi; + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(N->getOperand(3), N->getValueType(0), dl); + + Lo = DAG.getNode(Opcode, dl, LL.getValueType(), CL, LL, RL, EVLLo); + Hi = DAG.getNode(Opcode, dl, LH.getValueType(), CH, LH, RH, EVLHi); } void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 1493f36fcd3e..abf6a3ac6916 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -133,6 +133,8 @@ class VectorLegalizer { /// Implement vselect in terms of XOR, AND, OR when blend is not /// supported by the target. SDValue ExpandVSELECT(SDNode *Node); + SDValue ExpandVP_SELECT(SDNode *Node); + SDValue ExpandVP_MERGE(SDNode *Node); SDValue ExpandSELECT(SDNode *Node); std::pair<SDValue, SDValue> ExpandLoad(SDNode *N); SDValue ExpandStore(SDNode *N); @@ -457,6 +459,14 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; } + +#define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...) \ + case ISD::VPID: { \ + EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \ + : Node->getOperand(LEGALPOS).getValueType(); \ + Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \ + } break; +#include "llvm/IR/VPIntrinsics.def" } LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); @@ -718,6 +728,9 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { case ISD::VSELECT: Results.push_back(ExpandVSELECT(Node)); return; + case ISD::VP_SELECT: + Results.push_back(ExpandVP_SELECT(Node)); + return; case ISD::SELECT: Results.push_back(ExpandSELECT(Node)); return; @@ -865,6 +878,9 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { case ISD::UREM: ExpandREM(Node, Results); return; + case ISD::VP_MERGE: + Results.push_back(ExpandVP_MERGE(Node)); + return; } Results.push_back(DAG.UnrollVectorOp(Node)); @@ -1195,6 +1211,79 @@ SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) { return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val); } +SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) { + // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which + // do not support it natively. + SDLoc DL(Node); + + SDValue Mask = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + SDValue Op2 = Node->getOperand(2); + SDValue EVL = Node->getOperand(3); + + EVT VT = Mask.getValueType(); + + // If we can't even use the basic vector operations of + // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op. + if (TLI.getOperationAction(ISD::VP_AND, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::VP_XOR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::VP_OR, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Node); + + // This operation also isn't safe when the operands aren't also booleans. + if (Op1.getValueType().getVectorElementType() != MVT::i1) + return DAG.UnrollVectorOp(Node); + + SDValue Ones = DAG.getAllOnesConstant(DL, VT); + SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Mask, EVL); + + Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Mask, EVL); + Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Mask, EVL); + return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Mask, EVL); +} + +SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) { + // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector + // indices less than the EVL/pivot are true. Combine that with the original + // mask for a full-length mask. Use a full-length VSELECT to select between + // the true and false values. + SDLoc DL(Node); + + SDValue Mask = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + SDValue Op2 = Node->getOperand(2); + SDValue EVL = Node->getOperand(3); + + EVT MaskVT = Mask.getValueType(); + bool IsFixedLen = MaskVT.isFixedLengthVector(); + + EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), EVL.getValueType(), + MaskVT.getVectorElementCount()); + + // If we can't construct the EVL mask efficiently, it's better to unroll. + if ((IsFixedLen && + !TLI.isOperationLegalOrCustom(ISD::BUILD_VECTOR, EVLVecVT)) || + (!IsFixedLen && + (!TLI.isOperationLegalOrCustom(ISD::STEP_VECTOR, EVLVecVT) || + !TLI.isOperationLegalOrCustom(ISD::SPLAT_VECTOR, EVLVecVT)))) + return DAG.UnrollVectorOp(Node); + + // If using a SETCC would result in a different type than the mask type, + // unroll. + if (TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + EVLVecVT) != MaskVT) + return DAG.UnrollVectorOp(Node); + + SDValue StepVec = DAG.getStepVector(DL, EVLVecVT); + SDValue SplatEVL = IsFixedLen ? DAG.getSplatBuildVector(EVLVecVT, DL, EVL) + : DAG.getSplatVector(EVLVecVT, DL, EVL); + SDValue EVLMask = + DAG.getSetCC(DL, MaskVT, StepVec, SplatEVL, ISD::CondCode::SETULT); + + SDValue FullMask = DAG.getNode(ISD::AND, DL, MaskVT, Mask, EVLMask); + return DAG.getSelect(DL, Node->getValueType(0), FullMask, Op1, Op2); +} + void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results) { // Attempt to expand using TargetLowering. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 7ec2638b1e71..0bd44ce4c872 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -914,7 +914,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; case ISD::VSELECT: - case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT: + case ISD::VP_MERGE: + case ISD::VP_SELECT: SplitRes_Select(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break; @@ -936,11 +938,15 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::LOAD: SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; + case ISD::VP_LOAD: + SplitVecRes_VP_LOAD(cast<VPLoadSDNode>(N), Lo, Hi); + break; case ISD::MLOAD: SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi); break; case ISD::MGATHER: - SplitVecRes_MGATHER(cast<MaskedGatherSDNode>(N), Lo, Hi); + case ISD::VP_GATHER: + SplitVecRes_Gather(cast<MemSDNode>(N), Lo, Hi, /*SplitSETCC*/ true); break; case ISD::SETCC: SplitVecRes_SETCC(N, Lo, Hi); @@ -1008,31 +1014,31 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_ExtendOp(N, Lo, Hi); break; - case ISD::ADD: - case ISD::SUB: - case ISD::MUL: + case ISD::ADD: case ISD::VP_ADD: + case ISD::SUB: case ISD::VP_SUB: + case ISD::MUL: case ISD::VP_MUL: case ISD::MULHS: case ISD::MULHU: - case ISD::FADD: - case ISD::FSUB: - case ISD::FMUL: + case ISD::FADD: case ISD::VP_FADD: + case ISD::FSUB: case ISD::VP_FSUB: + case ISD::FMUL: case ISD::VP_FMUL: case ISD::FMINNUM: case ISD::FMAXNUM: case ISD::FMINIMUM: case ISD::FMAXIMUM: - case ISD::SDIV: - case ISD::UDIV: - case ISD::FDIV: + case ISD::SDIV: case ISD::VP_SDIV: + case ISD::UDIV: case ISD::VP_UDIV: + case ISD::FDIV: case ISD::VP_FDIV: case ISD::FPOW: - case ISD::AND: - case ISD::OR: - case ISD::XOR: - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: - case ISD::UREM: - case ISD::SREM: - case ISD::FREM: + case ISD::AND: case ISD::VP_AND: + case ISD::OR: case ISD::VP_OR: + case ISD::XOR: case ISD::VP_XOR: + case ISD::SHL: case ISD::VP_SHL: + case ISD::SRA: case ISD::VP_ASHR: + case ISD::SRL: case ISD::VP_LSHR: + case ISD::UREM: case ISD::VP_UREM: + case ISD::SREM: case ISD::VP_SREM: + case ISD::FREM: case ISD::VP_FREM: case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: @@ -1045,7 +1051,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::USHLSAT: case ISD::ROTL: case ISD::ROTR: - SplitVecRes_BinOp(N, Lo, Hi, /*IsVP*/ false); + SplitVecRes_BinOp(N, Lo, Hi); break; case ISD::FMA: case ISD::FSHL: @@ -1082,26 +1088,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::UDIVFIXSAT: SplitVecRes_FIX(N, Lo, Hi); break; - case ISD::VP_ADD: - case ISD::VP_AND: - case ISD::VP_MUL: - case ISD::VP_OR: - case ISD::VP_SUB: - case ISD::VP_XOR: - case ISD::VP_SHL: - case ISD::VP_LSHR: - case ISD::VP_ASHR: - case ISD::VP_SDIV: - case ISD::VP_UDIV: - case ISD::VP_SREM: - case ISD::VP_UREM: - case ISD::VP_FADD: - case ISD::VP_FSUB: - case ISD::VP_FMUL: - case ISD::VP_FDIV: - case ISD::VP_FREM: - SplitVecRes_BinOp(N, Lo, Hi, /*IsVP*/ true); - break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1133,8 +1119,22 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, } } -void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi, - bool IsVP) { +std::pair<SDValue, SDValue> DAGTypeLegalizer::SplitMask(SDValue Mask) { + return SplitMask(Mask, SDLoc(Mask)); +} + +std::pair<SDValue, SDValue> DAGTypeLegalizer::SplitMask(SDValue Mask, + const SDLoc &DL) { + SDValue MaskLo, MaskHi; + EVT MaskVT = Mask.getValueType(); + if (getTypeAction(MaskVT) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + return std::make_pair(MaskLo, MaskHi); +} + +void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHSLo, LHSHi; GetSplitVector(N->getOperand(0), LHSLo, LHSHi); SDValue RHSLo, RHSHi; @@ -1143,36 +1143,21 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi, const SDNodeFlags Flags = N->getFlags(); unsigned Opcode = N->getOpcode(); - if (!IsVP) { + if (N->getNumOperands() == 2) { Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags); Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags); return; } - // Split the mask. + assert(N->getNumOperands() == 4 && "Unexpected number of operands!"); + assert(N->isVPOpcode() && "Expected VP opcode"); + SDValue MaskLo, MaskHi; - SDValue Mask = N->getOperand(2); - EVT MaskVT = Mask.getValueType(); - if (getTypeAction(MaskVT) == TargetLowering::TypeSplitVector) - GetSplitVector(Mask, MaskLo, MaskHi); - else - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, SDLoc(Mask)); - - // Split the vector length parameter. - // %evl -> umin(%evl, %halfnumelts) and usubsat(%evl - %halfnumelts). - SDValue EVL = N->getOperand(3); - EVT VecVT = N->getValueType(0); - EVT EVLVT = EVL.getValueType(); - assert(VecVT.getVectorElementCount().isKnownEven() && - "Expecting the mask to be an evenly-sized vector"); - unsigned HalfMinNumElts = VecVT.getVectorMinNumElements() / 2; - SDValue HalfNumElts = - VecVT.isFixedLengthVector() - ? DAG.getConstant(HalfMinNumElts, dl, EVLVT) - : DAG.getVScale(dl, EVLVT, - APInt(EVLVT.getScalarSizeInBits(), HalfMinNumElts)); - SDValue EVLLo = DAG.getNode(ISD::UMIN, dl, EVLVT, EVL, HalfNumElts); - SDValue EVLHi = DAG.getNode(ISD::USUBSAT, dl, EVLVT, EVL, HalfNumElts); + std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(2)); + + SDValue EVLLo, EVLHi; + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(N->getOperand(3), N->getValueType(0), dl); Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), {LHSLo, RHSLo, MaskLo, EVLLo}, Flags); @@ -1781,6 +1766,86 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, ReplaceValueWith(SDValue(LD, 1), Ch); } +void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, + SDValue &Hi) { + assert(LD->isUnindexed() && "Indexed VP load during type legalization!"); + EVT LoVT, HiVT; + SDLoc dl(LD); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); + + ISD::LoadExtType ExtType = LD->getExtensionType(); + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + SDValue Offset = LD->getOffset(); + assert(Offset.isUndef() && "Unexpected indexed variable-length load offset"); + Align Alignment = LD->getOriginalAlign(); + SDValue Mask = LD->getMask(); + SDValue EVL = LD->getVectorLength(); + EVT MemoryVT = LD->getMemoryVT(); + + EVT LoMemVT, HiMemVT; + bool HiIsEmpty = false; + std::tie(LoMemVT, HiMemVT) = + DAG.GetDependentSplitDestVTs(MemoryVT, LoVT, &HiIsEmpty); + + // Split Mask operand + SDValue MaskLo, MaskHi; + if (Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + } + + // Split EVL operand + SDValue EVLLo, EVLHi; + std::tie(EVLLo, EVLHi) = DAG.SplitEVL(EVL, LD->getValueType(0), dl); + + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + LD->getPointerInfo(), MachineMemOperand::MOLoad, + MemoryLocation::UnknownSize, Alignment, LD->getAAInfo(), LD->getRanges()); + + Lo = + DAG.getLoadVP(LD->getAddressingMode(), ExtType, LoVT, dl, Ch, Ptr, Offset, + MaskLo, EVLLo, LoMemVT, MMO, LD->isExpandingLoad()); + + if (HiIsEmpty) { + // The hi vp_load has zero storage size. We therefore simply set it to + // the low vp_load and rely on subsequent removal from the chain. + Hi = Lo; + } else { + // Generate hi vp_load. + Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, + LD->isExpandingLoad()); + + MachinePointerInfo MPI; + if (LoMemVT.isScalableVector()) + MPI = MachinePointerInfo(LD->getPointerInfo().getAddrSpace()); + else + MPI = LD->getPointerInfo().getWithOffset( + LoMemVT.getStoreSize().getFixedSize()); + + MMO = DAG.getMachineFunction().getMachineMemOperand( + MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment, + LD->getAAInfo(), LD->getRanges()); + + Hi = DAG.getLoadVP(LD->getAddressingMode(), ExtType, HiVT, dl, Ch, Ptr, + Offset, MaskHi, EVLHi, HiMemVT, MMO, + LD->isExpandingLoad()); + } + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(LD, 1), Ch); +} + void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi) { assert(MLD->isUnindexed() && "Indexed masked load during type legalization!"); @@ -1865,61 +1930,85 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, } -void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, - SDValue &Lo, SDValue &Hi) { +void DAGTypeLegalizer::SplitVecRes_Gather(MemSDNode *N, SDValue &Lo, + SDValue &Hi, bool SplitSETCC) { EVT LoVT, HiVT; - SDLoc dl(MGT); - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0)); - - SDValue Ch = MGT->getChain(); - SDValue Ptr = MGT->getBasePtr(); - SDValue Mask = MGT->getMask(); - SDValue PassThru = MGT->getPassThru(); - SDValue Index = MGT->getIndex(); - SDValue Scale = MGT->getScale(); - EVT MemoryVT = MGT->getMemoryVT(); - Align Alignment = MGT->getOriginalAlign(); - ISD::LoadExtType ExtType = MGT->getExtensionType(); + SDLoc dl(N); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + struct Operands { + SDValue Mask; + SDValue Index; + SDValue Scale; + } Ops = [&]() -> Operands { + if (auto *MSC = dyn_cast<MaskedGatherSDNode>(N)) { + return {MSC->getMask(), MSC->getIndex(), MSC->getScale()}; + } + auto *VPSC = cast<VPGatherSDNode>(N); + return {VPSC->getMask(), VPSC->getIndex(), VPSC->getScale()}; + }(); + + EVT MemoryVT = N->getMemoryVT(); + Align Alignment = N->getOriginalAlign(); // Split Mask operand SDValue MaskLo, MaskHi; - if (Mask.getOpcode() == ISD::SETCC) { - SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + if (SplitSETCC && Ops.Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Ops.Mask.getNode(), MaskLo, MaskHi); } else { - if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) - GetSplitVector(Mask, MaskLo, MaskHi); - else - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + std::tie(MaskLo, MaskHi) = SplitMask(Ops.Mask, dl); } EVT LoMemVT, HiMemVT; // Split MemoryVT std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - SDValue PassThruLo, PassThruHi; - if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector) - GetSplitVector(PassThru, PassThruLo, PassThruHi); - else - std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl); - SDValue IndexHi, IndexLo; - if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) - GetSplitVector(Index, IndexLo, IndexHi); + if (getTypeAction(Ops.Index.getValueType()) == + TargetLowering::TypeSplitVector) + GetSplitVector(Ops.Index, IndexLo, IndexHi); else - std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, dl); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MGT->getPointerInfo(), MachineMemOperand::MOLoad, - MemoryLocation::UnknownSize, Alignment, MGT->getAAInfo(), - MGT->getRanges()); + N->getPointerInfo(), MachineMemOperand::MOLoad, + MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges()); + + if (auto *MGT = dyn_cast<MaskedGatherSDNode>(N)) { + SDValue PassThru = MGT->getPassThru(); + SDValue PassThruLo, PassThruHi; + if (getTypeAction(PassThru.getValueType()) == + TargetLowering::TypeSplitVector) + GetSplitVector(PassThru, PassThruLo, PassThruHi); + else + std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl); - SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale}; - Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, OpsLo, - MMO, MGT->getIndexType(), ExtType); + ISD::LoadExtType ExtType = MGT->getExtensionType(); + ISD::MemIndexType IndexTy = MGT->getIndexType(); - SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale}; - Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, OpsHi, - MMO, MGT->getIndexType(), ExtType); + SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Ops.Scale}; + Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, + OpsLo, MMO, IndexTy, ExtType); + + SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Ops.Scale}; + Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, + OpsHi, MMO, IndexTy, ExtType); + } else { + auto *VPGT = cast<VPGatherSDNode>(N); + SDValue EVLLo, EVLHi; + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(VPGT->getVectorLength(), MemoryVT, dl); + + SDValue OpsLo[] = {Ch, Ptr, IndexLo, Ops.Scale, MaskLo, EVLLo}; + Lo = DAG.getGatherVP(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, OpsLo, + MMO, VPGT->getIndexType()); + + SDValue OpsHi[] = {Ch, Ptr, IndexHi, Ops.Scale, MaskHi, EVLHi}; + Hi = DAG.getGatherVP(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, OpsHi, + MMO, VPGT->getIndexType()); + } // Build a factor node to remember that this load is independent of the // other one. @@ -1928,10 +2017,9 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, // Legalize the chain result - switch anything that used the old chain to // use the new one. - ReplaceValueWith(SDValue(MGT, 1), Ch); + ReplaceValueWith(SDValue(N, 1), Ch); } - void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && @@ -2221,14 +2309,19 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); break; + case ISD::VP_STORE: + Res = SplitVecOp_VP_STORE(cast<VPStoreSDNode>(N), OpNo); + break; case ISD::MSTORE: Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo); break; case ISD::MSCATTER: - Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo); + case ISD::VP_SCATTER: + Res = SplitVecOp_Scatter(cast<MemSDNode>(N), OpNo); break; case ISD::MGATHER: - Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo); + case ISD::VP_GATHER: + Res = SplitVecOp_Gather(cast<MemSDNode>(N), OpNo); break; case ISD::VSELECT: Res = SplitVecOp_VSELECT(N, OpNo); @@ -2285,6 +2378,23 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::VECREDUCE_SEQ_FMUL: Res = SplitVecOp_VECREDUCE_SEQ(N); break; + case ISD::VP_REDUCE_FADD: + case ISD::VP_REDUCE_SEQ_FADD: + case ISD::VP_REDUCE_FMUL: + case ISD::VP_REDUCE_SEQ_FMUL: + case ISD::VP_REDUCE_ADD: + case ISD::VP_REDUCE_MUL: + case ISD::VP_REDUCE_AND: + case ISD::VP_REDUCE_OR: + case ISD::VP_REDUCE_XOR: + case ISD::VP_REDUCE_SMAX: + case ISD::VP_REDUCE_SMIN: + case ISD::VP_REDUCE_UMAX: + case ISD::VP_REDUCE_UMIN: + case ISD::VP_REDUCE_FMAX: + case ISD::VP_REDUCE_FMIN: + Res = SplitVecOp_VP_REDUCE(N, OpNo); + break; } // If the result is null, the sub-method took care of registering results etc. @@ -2381,6 +2491,33 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE_SEQ(SDNode *N) { return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, Hi, Flags); } +SDValue DAGTypeLegalizer::SplitVecOp_VP_REDUCE(SDNode *N, unsigned OpNo) { + assert(N->isVPOpcode() && "Expected VP opcode"); + assert(OpNo == 1 && "Can only split reduce vector operand"); + + unsigned Opc = N->getOpcode(); + EVT ResVT = N->getValueType(0); + SDValue Lo, Hi; + SDLoc dl(N); + + SDValue VecOp = N->getOperand(OpNo); + EVT VecVT = VecOp.getValueType(); + assert(VecVT.isVector() && "Can only split reduce vector operand"); + GetSplitVector(VecOp, Lo, Hi); + + SDValue MaskLo, MaskHi; + std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(2)); + + SDValue EVLLo, EVLHi; + std::tie(EVLLo, EVLHi) = DAG.SplitEVL(N->getOperand(3), VecVT, dl); + + const SDNodeFlags Flags = N->getFlags(); + + SDValue ResLo = + DAG.getNode(Opc, dl, ResVT, {N->getOperand(0), Lo, MaskLo, EVLLo}, Flags); + return DAG.getNode(Opc, dl, ResVT, {ResLo, Hi, MaskHi, EVLHi}, Flags); +} + SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { // The result has a legal vector type, but the input needs splitting. EVT ResVT = N->getValueType(0); @@ -2558,70 +2695,92 @@ SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) { return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), N->getValueType(0), Lo, Hi); } -SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, - unsigned OpNo) { - EVT LoVT, HiVT; - SDLoc dl(MGT); - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0)); - - SDValue Ch = MGT->getChain(); - SDValue Ptr = MGT->getBasePtr(); - SDValue Index = MGT->getIndex(); - SDValue Scale = MGT->getScale(); - SDValue Mask = MGT->getMask(); - SDValue PassThru = MGT->getPassThru(); - Align Alignment = MGT->getOriginalAlign(); - ISD::LoadExtType ExtType = MGT->getExtensionType(); +SDValue DAGTypeLegalizer::SplitVecOp_Gather(MemSDNode *N, unsigned OpNo) { + (void)OpNo; + SDValue Lo, Hi; + SplitVecRes_Gather(N, Lo, Hi); - SDValue MaskLo, MaskHi; - if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) - // Split Mask operand - GetSplitVector(Mask, MaskLo, MaskHi); - else - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, N, N->getValueType(0), Lo, Hi); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); +} - EVT MemoryVT = MGT->getMemoryVT(); - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); +SDValue DAGTypeLegalizer::SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo) { + assert(N->isUnindexed() && "Indexed vp_store of vector?"); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + SDValue Offset = N->getOffset(); + assert(Offset.isUndef() && "Unexpected VP store offset"); + SDValue Mask = N->getMask(); + SDValue EVL = N->getVectorLength(); + SDValue Data = N->getValue(); + Align Alignment = N->getOriginalAlign(); + SDLoc DL(N); - SDValue PassThruLo, PassThruHi; - if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector) - GetSplitVector(PassThru, PassThruLo, PassThruHi); + SDValue DataLo, DataHi; + if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) + // Split Data operand + GetSplitVector(Data, DataLo, DataHi); else - std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl); + std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); - SDValue IndexHi, IndexLo; - if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) - GetSplitVector(Index, IndexLo, IndexHi); - else - std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); + // Split Mask operand + SDValue MaskLo, MaskHi; + if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + } + EVT MemoryVT = N->getMemoryVT(); + EVT LoMemVT, HiMemVT; + bool HiIsEmpty = false; + std::tie(LoMemVT, HiMemVT) = + DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty); + + // Split EVL + SDValue EVLLo, EVLHi; + std::tie(EVLLo, EVLHi) = DAG.SplitEVL(EVL, Data.getValueType(), DL); + + SDValue Lo, Hi; MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MGT->getPointerInfo(), MachineMemOperand::MOLoad, - MemoryLocation::UnknownSize, Alignment, MGT->getAAInfo(), - MGT->getRanges()); + N->getPointerInfo(), MachineMemOperand::MOStore, + MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges()); - SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale}; - SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, - OpsLo, MMO, MGT->getIndexType(), ExtType); + Lo = DAG.getStoreVP(Ch, DL, DataLo, Ptr, Offset, MaskLo, EVLLo, LoMemVT, MMO, + N->getAddressingMode(), N->isTruncatingStore(), + N->isCompressingStore()); - SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale}; - SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, - OpsHi, MMO, MGT->getIndexType(), ExtType); + // If the hi vp_store has zero storage size, only the lo vp_store is needed. + if (HiIsEmpty) + return Lo; - // Build a factor node to remember that this load is independent of the - // other one. - Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); + Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, + N->isCompressingStore()); - // Legalize the chain result - switch anything that used the old chain to - // use the new one. - ReplaceValueWith(SDValue(MGT, 1), Ch); + MachinePointerInfo MPI; + if (LoMemVT.isScalableVector()) { + Alignment = commonAlignment(Alignment, + LoMemVT.getSizeInBits().getKnownMinSize() / 8); + MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace()); + } else + MPI = N->getPointerInfo().getWithOffset( + LoMemVT.getStoreSize().getFixedSize()); - SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MGT->getValueType(0), Lo, - Hi); - ReplaceValueWith(SDValue(MGT, 0), Res); - return SDValue(); + MMO = DAG.getMachineFunction().getMachineMemOperand( + MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment, + N->getAAInfo(), N->getRanges()); + + Hi = DAG.getStoreVP(Ch, DL, DataHi, Ptr, Offset, MaskHi, EVLHi, HiMemVT, MMO, + N->getAddressingMode(), N->isTruncatingStore(), + N->isCompressingStore()); + + // Build a factor node to remember that this store is independent of the + // other one. + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); } SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, @@ -2703,64 +2862,87 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, return Res; } -SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, - unsigned OpNo) { - SDValue Ch = N->getChain(); +SDValue DAGTypeLegalizer::SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo) { + SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); - SDValue Mask = N->getMask(); - SDValue Index = N->getIndex(); - SDValue Scale = N->getScale(); - SDValue Data = N->getValue(); EVT MemoryVT = N->getMemoryVT(); Align Alignment = N->getOriginalAlign(); SDLoc DL(N); - + struct Operands { + SDValue Mask; + SDValue Index; + SDValue Scale; + SDValue Data; + } Ops = [&]() -> Operands { + if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) { + return {MSC->getMask(), MSC->getIndex(), MSC->getScale(), + MSC->getValue()}; + } + auto *VPSC = cast<VPScatterSDNode>(N); + return {VPSC->getMask(), VPSC->getIndex(), VPSC->getScale(), + VPSC->getValue()}; + }(); // Split all operands EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue DataLo, DataHi; - if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) + if (getTypeAction(Ops.Data.getValueType()) == TargetLowering::TypeSplitVector) // Split Data operand - GetSplitVector(Data, DataLo, DataHi); + GetSplitVector(Ops.Data, DataLo, DataHi); else - std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + std::tie(DataLo, DataHi) = DAG.SplitVector(Ops.Data, DL); // Split Mask operand SDValue MaskLo, MaskHi; - if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) { - SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + if (OpNo == 1 && Ops.Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Ops.Mask.getNode(), MaskLo, MaskHi); } else { - if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) - GetSplitVector(Mask, MaskLo, MaskHi); - else - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + std::tie(MaskLo, MaskHi) = SplitMask(Ops.Mask, DL); } SDValue IndexHi, IndexLo; - if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) - GetSplitVector(Index, IndexLo, IndexHi); + if (getTypeAction(Ops.Index.getValueType()) == + TargetLowering::TypeSplitVector) + GetSplitVector(Ops.Index, IndexLo, IndexHi); else - std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, DL); SDValue Lo; MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( N->getPointerInfo(), MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges()); - SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale}; - Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), LoMemVT, - DL, OpsLo, MMO, N->getIndexType(), - N->isTruncatingStore()); + if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) { + SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Ops.Scale}; + Lo = + DAG.getMaskedScatter(DAG.getVTList(MVT::Other), LoMemVT, DL, OpsLo, MMO, + MSC->getIndexType(), MSC->isTruncatingStore()); + + // The order of the Scatter operation after split is well defined. The "Hi" + // part comes after the "Lo". So these two operations should be chained one + // after another. + SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Ops.Scale}; + return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), HiMemVT, DL, OpsHi, + MMO, MSC->getIndexType(), + MSC->isTruncatingStore()); + } + auto *VPSC = cast<VPScatterSDNode>(N); + SDValue EVLLo, EVLHi; + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(VPSC->getVectorLength(), Ops.Data.getValueType(), DL); + + SDValue OpsLo[] = {Ch, DataLo, Ptr, IndexLo, Ops.Scale, MaskLo, EVLLo}; + Lo = DAG.getScatterVP(DAG.getVTList(MVT::Other), LoMemVT, DL, OpsLo, MMO, + VPSC->getIndexType()); // The order of the Scatter operation after split is well defined. The "Hi" // part comes after the "Lo". So these two operations should be chained one // after another. - SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale}; - return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), HiMemVT, - DL, OpsHi, MMO, N->getIndexType(), - N->isTruncatingStore()); + SDValue OpsHi[] = {Lo, DataHi, Ptr, IndexHi, Ops.Scale, MaskHi, EVLHi}; + return DAG.getScatterVP(DAG.getVTList(MVT::Other), HiMemVT, DL, OpsHi, MMO, + VPSC->getIndexType()); } SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -3047,31 +3229,41 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break; case ISD::VSELECT: - case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; + case ISD::SELECT: + case ISD::VP_SELECT: + case ISD::VP_MERGE: + Res = WidenVecRes_Select(N); + break; case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; case ISD::SETCC: Res = WidenVecRes_SETCC(N); break; case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; case ISD::VECTOR_SHUFFLE: Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); break; + case ISD::VP_LOAD: + Res = WidenVecRes_VP_LOAD(cast<VPLoadSDNode>(N)); + break; case ISD::MLOAD: Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N)); break; case ISD::MGATHER: Res = WidenVecRes_MGATHER(cast<MaskedGatherSDNode>(N)); break; + case ISD::VP_GATHER: + Res = WidenVecRes_VP_GATHER(cast<VPGatherSDNode>(N)); + break; - case ISD::ADD: - case ISD::AND: - case ISD::MUL: + case ISD::ADD: case ISD::VP_ADD: + case ISD::AND: case ISD::VP_AND: + case ISD::MUL: case ISD::VP_MUL: case ISD::MULHS: case ISD::MULHU: - case ISD::OR: - case ISD::SUB: - case ISD::XOR: - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: + case ISD::OR: case ISD::VP_OR: + case ISD::SUB: case ISD::VP_SUB: + case ISD::XOR: case ISD::VP_XOR: + case ISD::SHL: case ISD::VP_SHL: + case ISD::SRA: case ISD::VP_ASHR: + case ISD::SRL: case ISD::VP_LSHR: case ISD::FMINNUM: case ISD::FMAXNUM: case ISD::FMINIMUM: @@ -3088,7 +3280,21 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::USHLSAT: case ISD::ROTL: case ISD::ROTR: - Res = WidenVecRes_Binary(N, /*IsVP*/ false); + // Vector-predicated binary op widening. Note that -- unlike the + // unpredicated versions -- we don't have to worry about trapping on + // operations like UDIV, FADD, etc., as we pass on the original vector + // length parameter. This means the widened elements containing garbage + // aren't active. + case ISD::VP_SDIV: + case ISD::VP_UDIV: + case ISD::VP_SREM: + case ISD::VP_UREM: + case ISD::VP_FADD: + case ISD::VP_FSUB: + case ISD::VP_FMUL: + case ISD::VP_FDIV: + case ISD::VP_FREM: + Res = WidenVecRes_Binary(N); break; case ISD::FADD: @@ -3212,31 +3418,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FSHR: Res = WidenVecRes_Ternary(N); break; - case ISD::VP_ADD: - case ISD::VP_AND: - case ISD::VP_MUL: - case ISD::VP_OR: - case ISD::VP_SUB: - case ISD::VP_XOR: - case ISD::VP_SHL: - case ISD::VP_LSHR: - case ISD::VP_ASHR: - case ISD::VP_SDIV: - case ISD::VP_UDIV: - case ISD::VP_SREM: - case ISD::VP_UREM: - case ISD::VP_FADD: - case ISD::VP_FSUB: - case ISD::VP_FMUL: - case ISD::VP_FDIV: - case ISD::VP_FREM: - // Vector-predicated binary op widening. Note that -- unlike the - // unpredicated versions -- we don't have to worry about trapping on - // operations like UDIV, FADD, etc., as we pass on the original vector - // length parameter. This means the widened elements containing garbage - // aren't active. - Res = WidenVecRes_Binary(N, /*IsVP*/ true); - break; } // If Res is null, the sub-method took care of registering the result. @@ -3254,29 +3435,21 @@ SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); } -SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N, bool IsVP) { +SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { // Binary op widening. SDLoc dl(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - if (!IsVP) + if (N->getNumOperands() == 2) return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags()); - // For VP operations, we must also widen the mask. Note that the mask type - // may not actually need widening, leading it be split along with the VP - // operation. - // FIXME: This could lead to an infinite split/widen loop. We only handle the - // case where the mask needs widening to an identically-sized type as the - // vector inputs. - SDValue Mask = N->getOperand(2); - assert(getTypeAction(Mask.getValueType()) == - TargetLowering::TypeWidenVector && - "Unable to widen binary VP op"); - Mask = GetWidenedVector(Mask); - assert(Mask.getValueType().getVectorElementCount() == - WidenVT.getVectorElementCount() && - "Unable to widen binary VP op"); + + assert(N->getNumOperands() == 4 && "Unexpected number of operands!"); + assert(N->isVPOpcode() && "Expected VP opcode"); + + SDValue Mask = + GetWidenedMask(N->getOperand(2), WidenVT.getVectorElementCount()); return DAG.getNode(N->getOpcode(), dl, WidenVT, {InOp1, InOp2, Mask, N->getOperand(3)}, N->getFlags()); } @@ -4226,6 +4399,33 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { report_fatal_error("Unable to widen vector load"); } +SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD(VPLoadSDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Mask = N->getMask(); + SDValue EVL = N->getVectorLength(); + ISD::LoadExtType ExtType = N->getExtensionType(); + SDLoc dl(N); + + // The mask should be widened as well + assert(getTypeAction(Mask.getValueType()) == + TargetLowering::TypeWidenVector && + "Unable to widen binary VP op"); + Mask = GetWidenedVector(Mask); + assert(Mask.getValueType().getVectorElementCount() == + TLI.getTypeToTransformTo(*DAG.getContext(), Mask.getValueType()) + .getVectorElementCount() && + "Unable to widen vector load"); + + SDValue Res = + DAG.getLoadVP(N->getAddressingMode(), ExtType, WidenVT, dl, N->getChain(), + N->getBasePtr(), N->getOffset(), Mask, EVL, + N->getMemoryVT(), N->getMemOperand(), N->isExpandingLoad()); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0)); @@ -4289,6 +4489,29 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { return Res; } +SDValue DAGTypeLegalizer::WidenVecRes_VP_GATHER(VPGatherSDNode *N) { + EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Mask = N->getMask(); + SDValue Scale = N->getScale(); + ElementCount WideEC = WideVT.getVectorElementCount(); + SDLoc dl(N); + + SDValue Index = GetWidenedVector(N->getIndex()); + EVT WideMemVT = EVT::getVectorVT(*DAG.getContext(), + N->getMemoryVT().getScalarType(), WideEC); + Mask = GetWidenedMask(Mask, WideEC); + + SDValue Ops[] = {N->getChain(), N->getBasePtr(), Index, Scale, + Mask, N->getVectorLength()}; + SDValue Res = DAG.getGatherVP(DAG.getVTList(WideVT, MVT::Other), WideMemVT, + dl, Ops, N->getMemOperand(), N->getIndexType()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + SDValue DAGTypeLegalizer::WidenVecRes_ScalarOp(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0)); @@ -4522,19 +4745,19 @@ SDValue DAGTypeLegalizer::WidenVSELECTMask(SDNode *N) { return Mask; } -SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { +SDValue DAGTypeLegalizer::WidenVecRes_Select(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); ElementCount WidenEC = WidenVT.getVectorElementCount(); SDValue Cond1 = N->getOperand(0); EVT CondVT = Cond1.getValueType(); + unsigned Opcode = N->getOpcode(); if (CondVT.isVector()) { if (SDValue WideCond = WidenVSELECTMask(N)) { SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDValue InOp2 = GetWidenedVector(N->getOperand(2)); assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); - return DAG.getNode(N->getOpcode(), SDLoc(N), - WidenVT, WideCond, InOp1, InOp2); + return DAG.getNode(Opcode, SDLoc(N), WidenVT, WideCond, InOp1, InOp2); } EVT CondEltVT = CondVT.getVectorElementType(); @@ -4560,8 +4783,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDValue InOp2 = GetWidenedVector(N->getOperand(2)); assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); - return DAG.getNode(N->getOpcode(), SDLoc(N), - WidenVT, Cond1, InOp1, InOp2); + return Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE + ? DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2, + N->getOperand(3)) + : DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2); } SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { @@ -4711,9 +4936,11 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; + case ISD::VP_STORE: Res = WidenVecOp_VP_STORE(N, OpNo); break; case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break; case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break; case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break; + case ISD::VP_SCATTER: Res = WidenVecOp_VP_SCATTER(N, OpNo); break; case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break; @@ -4766,6 +4993,23 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::VECREDUCE_SEQ_FMUL: Res = WidenVecOp_VECREDUCE_SEQ(N); break; + case ISD::VP_REDUCE_FADD: + case ISD::VP_REDUCE_SEQ_FADD: + case ISD::VP_REDUCE_FMUL: + case ISD::VP_REDUCE_SEQ_FMUL: + case ISD::VP_REDUCE_ADD: + case ISD::VP_REDUCE_MUL: + case ISD::VP_REDUCE_AND: + case ISD::VP_REDUCE_OR: + case ISD::VP_REDUCE_XOR: + case ISD::VP_REDUCE_SMAX: + case ISD::VP_REDUCE_SMIN: + case ISD::VP_REDUCE_UMAX: + case ISD::VP_REDUCE_UMIN: + case ISD::VP_REDUCE_FMAX: + case ISD::VP_REDUCE_FMIN: + Res = WidenVecOp_VP_REDUCE(N); + break; } // If Res is null, the sub-method took care of registering the result. @@ -5092,15 +5336,54 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { unsigned NumVTElts = StVT.getVectorMinNumElements(); SDValue EVL = DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts)); - const auto *MMO = ST->getMemOperand(); - return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(), Mask, - EVL, MMO->getPointerInfo(), MMO->getAlign(), - MMO->getFlags(), MMO->getAAInfo()); + return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(), + DAG.getUNDEF(ST->getBasePtr().getValueType()), Mask, + EVL, StVal.getValueType(), ST->getMemOperand(), + ST->getAddressingMode()); } report_fatal_error("Unable to widen vector store"); } +SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) { + assert((OpNo == 1 || OpNo == 3) && + "Can widen only data or mask operand of vp_store"); + VPStoreSDNode *ST = cast<VPStoreSDNode>(N); + SDValue Mask = ST->getMask(); + SDValue StVal = ST->getValue(); + SDLoc dl(N); + + if (OpNo == 1) { + // Widen the value. + StVal = GetWidenedVector(StVal); + + // We only handle the case where the mask needs widening to an + // identically-sized type as the vector inputs. + assert(getTypeAction(Mask.getValueType()) == + TargetLowering::TypeWidenVector && + "Unable to widen VP store"); + Mask = GetWidenedVector(Mask); + } else { + Mask = GetWidenedVector(Mask); + + // We only handle the case where the stored value needs widening to an + // identically-sized type as the mask. + assert(getTypeAction(StVal.getValueType()) == + TargetLowering::TypeWidenVector && + "Unable to widen VP store"); + StVal = GetWidenedVector(StVal); + } + + assert(Mask.getValueType().getVectorElementCount() == + StVal.getValueType().getVectorElementCount() && + "Mask and data vectors should have the same number of elements"); + return DAG.getStoreVP(ST->getChain(), dl, StVal, ST->getBasePtr(), + ST->getOffset(), Mask, ST->getVectorLength(), + ST->getMemoryVT(), ST->getMemOperand(), + ST->getAddressingMode(), ST->isTruncatingStore(), + ST->isCompressingStore()); +} + SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { assert((OpNo == 1 || OpNo == 3) && "Can widen only data or mask operand of mstore"); @@ -5202,6 +5485,34 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { MSC->isTruncatingStore()); } +SDValue DAGTypeLegalizer::WidenVecOp_VP_SCATTER(SDNode *N, unsigned OpNo) { + VPScatterSDNode *VPSC = cast<VPScatterSDNode>(N); + SDValue DataOp = VPSC->getValue(); + SDValue Mask = VPSC->getMask(); + SDValue Index = VPSC->getIndex(); + SDValue Scale = VPSC->getScale(); + EVT WideMemVT = VPSC->getMemoryVT(); + + if (OpNo == 1) { + DataOp = GetWidenedVector(DataOp); + Index = GetWidenedVector(Index); + const auto WideEC = DataOp.getValueType().getVectorElementCount(); + Mask = GetWidenedMask(Mask, WideEC); + WideMemVT = EVT::getVectorVT(*DAG.getContext(), + VPSC->getMemoryVT().getScalarType(), WideEC); + } else if (OpNo == 4) { + // Just widen the index. It's allowed to have extra elements. + Index = GetWidenedVector(Index); + } else + llvm_unreachable("Can't widen this operand of mscatter"); + + SDValue Ops[] = { + VPSC->getChain(), DataOp, VPSC->getBasePtr(), Index, Scale, Mask, + VPSC->getVectorLength()}; + return DAG.getScatterVP(DAG.getVTList(MVT::Other), WideMemVT, SDLoc(N), Ops, + VPSC->getMemOperand(), VPSC->getIndexType()); +} + SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SDValue InOp0 = GetWidenedVector(N->getOperand(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(1)); @@ -5320,6 +5631,19 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) { return DAG.getNode(Opc, dl, N->getValueType(0), AccOp, Op, Flags); } +SDValue DAGTypeLegalizer::WidenVecOp_VP_REDUCE(SDNode *N) { + assert(N->isVPOpcode() && "Expected VP opcode"); + + SDLoc dl(N); + SDValue Op = GetWidenedVector(N->getOperand(1)); + SDValue Mask = GetWidenedMask(N->getOperand(2), + Op.getValueType().getVectorElementCount()); + + return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), + {N->getOperand(0), Op, Mask, N->getOperand(3)}, + N->getFlags()); +} + SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) { // This only gets called in the case that the left and right inputs and // result are of a legal odd vector type, and the condition is illegal i1 of @@ -5779,6 +6103,8 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, EVT InVT = InOp.getValueType(); assert(InVT.getVectorElementType() == NVT.getVectorElementType() && "input and widen element type must match"); + assert(!InVT.isScalableVector() && !NVT.isScalableVector() && + "cannot modify scalable vectors in this way"); SDLoc dl(InOp); // Check if InOp already has the right width. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index aec2cf38b400..403f34573899 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -286,7 +286,7 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { // Cluster loads by adding MVT::Glue outputs and inputs. This also // ensure they are scheduled in order of increasing addresses. SDNode *Lead = Loads[0]; - SDValue InGlue = SDValue(nullptr, 0); + SDValue InGlue; if (AddGlue(Lead, InGlue, true, DAG)) InGlue = SDValue(Lead, Lead->getNumValues() - 1); for (unsigned I = 1, E = Loads.size(); I != E; ++I) { @@ -1057,12 +1057,13 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { "first terminator cannot be a debug value"); for (MachineInstr &MI : make_early_inc_range( make_range(std::next(FirstTerm), InsertBB->end()))) { + // Only scan up to insertion point. + if (&MI == InsertPos) + break; + if (!MI.isDebugValue()) continue; - if (&MI == InsertPos) - InsertPos = std::prev(InsertPos->getIterator()); - // The DBG_VALUE was referencing a value produced by a terminator. By // moving the DBG_VALUE, the referenced value also needs invalidating. MI.getOperand(0).ChangeToRegister(0, false); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 2ae0d4df7b77..45f3005e8f57 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -373,31 +373,46 @@ ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) { llvm_unreachable("Expected VECREDUCE opcode"); case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_SEQ_FADD: + case ISD::VP_REDUCE_FADD: + case ISD::VP_REDUCE_SEQ_FADD: return ISD::FADD; case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_SEQ_FMUL: + case ISD::VP_REDUCE_FMUL: + case ISD::VP_REDUCE_SEQ_FMUL: return ISD::FMUL; case ISD::VECREDUCE_ADD: + case ISD::VP_REDUCE_ADD: return ISD::ADD; case ISD::VECREDUCE_MUL: + case ISD::VP_REDUCE_MUL: return ISD::MUL; case ISD::VECREDUCE_AND: + case ISD::VP_REDUCE_AND: return ISD::AND; case ISD::VECREDUCE_OR: + case ISD::VP_REDUCE_OR: return ISD::OR; case ISD::VECREDUCE_XOR: + case ISD::VP_REDUCE_XOR: return ISD::XOR; case ISD::VECREDUCE_SMAX: + case ISD::VP_REDUCE_SMAX: return ISD::SMAX; case ISD::VECREDUCE_SMIN: + case ISD::VP_REDUCE_SMIN: return ISD::SMIN; case ISD::VECREDUCE_UMAX: + case ISD::VP_REDUCE_UMAX: return ISD::UMAX; case ISD::VECREDUCE_UMIN: + case ISD::VP_REDUCE_UMIN: return ISD::UMIN; case ISD::VECREDUCE_FMAX: + case ISD::VP_REDUCE_FMAX: return ISD::FMAXNUM; case ISD::VECREDUCE_FMIN: + case ISD::VP_REDUCE_FMIN: return ISD::FMINNUM; } } @@ -3066,7 +3081,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, case ISD::MUL: { Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - Known = KnownBits::mul(Known, Known2); + bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1); + Known = KnownBits::mul(Known, Known2, SelfMultiply); break; } case ISD::MULHU: { @@ -3085,8 +3101,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result"); Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1); if (Op.getResNo() == 0) - Known = KnownBits::mul(Known, Known2); + Known = KnownBits::mul(Known, Known2, SelfMultiply); else Known = KnownBits::mulhu(Known, Known2); break; @@ -3095,8 +3112,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result"); Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1); if (Op.getResNo() == 0) - Known = KnownBits::mul(Known, Known2); + Known = KnownBits::mul(Known, Known2, SelfMultiply); else Known = KnownBits::mulhs(Known, Known2); break; @@ -3363,6 +3381,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, case ISD::AssertAlign: { unsigned LogOfAlign = Log2(cast<AssertAlignSDNode>(Op)->getAlign()); assert(LogOfAlign != 0); + + // TODO: Should use maximum with source // If a node is guaranteed to be aligned, set low zero bits accordingly as // well as clearing one bits. Known.Zero.setLowBits(LogOfAlign); @@ -3584,6 +3604,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = KnownBits::smin(Known, Known2); break; } + case ISD::FP_TO_UINT_SAT: { + // FP_TO_UINT_SAT produces an unsigned value that fits in the saturating VT. + EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + Known.Zero |= APInt::getBitsSetFrom(BitWidth, VT.getScalarSizeInBits()); + break; + } case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: if (Op.getResNo() == 1) { // The boolean result conforms to getBooleanContents. @@ -3860,6 +3886,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, break; } + case ISD::FP_TO_SINT_SAT: + // FP_TO_SINT_SAT produces a signed value that fits in the saturating VT. + Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits(); + return VTBits - Tmp + 1; case ISD::SIGN_EXTEND: Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits(); return ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1) + Tmp; @@ -4252,7 +4282,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // scalar cases. Type *CstTy = Cst->getType(); if (CstTy->isVectorTy() && - (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits()) { + (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits() && + VTBits == CstTy->getScalarSizeInBits()) { Tmp = VTBits; for (unsigned i = 0; i != NumElts; ++i) { if (!DemandedElts[i]) @@ -4294,31 +4325,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. KnownBits Known = computeKnownBits(Op, DemandedElts, Depth); - - APInt Mask; - if (Known.isNonNegative()) { // sign bit is 0 - Mask = Known.Zero; - } else if (Known.isNegative()) { // sign bit is 1; - Mask = Known.One; - } else { - // Nothing known. - return FirstAnswer; - } - - // Okay, we know that the sign bit in Mask is set. Use CLO to determine - // the number of identical bits in the top of the input value. - Mask <<= Mask.getBitWidth()-VTBits; - return std::max(FirstAnswer, Mask.countLeadingOnes()); + return std::max(FirstAnswer, Known.countMinSignBits()); } -unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op, unsigned Depth) const { +unsigned SelectionDAG::ComputeMaxSignificantBits(SDValue Op, + unsigned Depth) const { unsigned SignBits = ComputeNumSignBits(Op, Depth); return Op.getScalarValueSizeInBits() - SignBits + 1; } -unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op, - const APInt &DemandedElts, - unsigned Depth) const { +unsigned SelectionDAG::ComputeMaxSignificantBits(SDValue Op, + const APInt &DemandedElts, + unsigned Depth) const { unsigned SignBits = ComputeNumSignBits(Op, DemandedElts, Depth); return Op.getScalarValueSizeInBits() - SignBits + 1; } @@ -5102,6 +5120,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "BSWAP types must be a multiple of 16 bits!"); if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); + // bswap(bswap(X)) -> X. + if (OpOpcode == ISD::BSWAP) + return Operand.getOperand(0); break; case ISD::BITREVERSE: assert(VT.isInteger() && VT == Operand.getValueType() && @@ -5398,6 +5419,19 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, } } + // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)). + // (shl step_vector(C0), C1) -> (step_vector(C0 << C1)) + if ((Opcode == ISD::MUL || Opcode == ISD::SHL) && + Ops[0].getOpcode() == ISD::STEP_VECTOR) { + APInt RHSVal; + if (ISD::isConstantSplatVector(Ops[1].getNode(), RHSVal)) { + APInt NewStep = Opcode == ISD::MUL + ? Ops[0].getConstantOperandAPInt(0) * RHSVal + : Ops[0].getConstantOperandAPInt(0) << RHSVal; + return getStepVector(DL, VT, NewStep); + } + } + auto IsScalarOrSameVectorSize = [NumElts](const SDValue &Op) { return !Op.getValueType().isVector() || Op.getValueType().getVectorElementCount() == NumElts; @@ -5595,22 +5629,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(N1.getOpcode() != ISD::DELETED_NODE && N2.getOpcode() != ISD::DELETED_NODE && "Operand is DELETED_NODE!"); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); - ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); - ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); - // Canonicalize constant to RHS if commutative. if (TLI->isCommutativeBinOp(Opcode)) { - if (N1C && !N2C) { - std::swap(N1C, N2C); + bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1); + bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2); + bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1); + bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2); + if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP)) std::swap(N1, N2); - } else if (N1CFP && !N2CFP) { - std::swap(N1CFP, N2CFP); - std::swap(N1, N2); - } } + auto *N1C = dyn_cast<ConstantSDNode>(N1); + auto *N2C = dyn_cast<ConstantSDNode>(N2); + + // Don't allow undefs in vector splats - we might be returning N2 when folding + // to zero etc. + ConstantSDNode *N2CV = + isConstOrConstSplat(N2, /*AllowUndefs*/ false, /*AllowTruncation*/ true); + switch (Opcode) { default: break; case ISD::TokenFactor: @@ -5640,9 +5676,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, N1.getValueType() == VT && "Binary operator types must match!"); // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's // worth handling here. - if (N2C && N2C->isZero()) + if (N2CV && N2CV->isZero()) return N2; - if (N2C && N2C->isAllOnes()) // X & -1 -> X + if (N2CV && N2CV->isAllOnes()) // X & -1 -> X return N1; break; case ISD::OR: @@ -5654,7 +5690,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, N1.getValueType() == VT && "Binary operator types must match!"); // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so // it's worth handling here. - if (N2C && N2C->isZero()) + if (N2CV && N2CV->isZero()) return N1; if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.isVector() && VT.getVectorElementType() == MVT::i1) @@ -5760,7 +5796,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // size of the value, the shift/rotate count is guaranteed to be zero. if (VT == MVT::i1) return N1; - if (N2C && N2C->isZero()) + if (N2CV && N2CV->isZero()) return N1; break; case ISD::FP_ROUND: @@ -6358,7 +6394,7 @@ static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG, Type *Ty = VT.getTypeForEVT(*DAG.getContext()); if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty)) return DAG.getConstant(Val, dl, VT); - return SDValue(nullptr, 0); + return SDValue(); } SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, TypeSize Offset, @@ -7697,23 +7733,6 @@ SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding) { - if (VT == MemVT) { - ExtType = ISD::NON_EXTLOAD; - } else if (ExtType == ISD::NON_EXTLOAD) { - assert(VT == MemVT && "Non-extending load from different memory type!"); - } else { - // Extending load. - assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) && - "Should only be an extending load, not truncating!"); - assert(VT.isInteger() == MemVT.isInteger() && - "Cannot convert from FP to Int or Int -> FP!"); - assert(VT.isVector() == MemVT.isVector() && - "Cannot use an ext load to convert to or from a vector!"); - assert((!VT.isVector() || - VT.getVectorElementCount() == MemVT.getVectorElementCount()) && - "Cannot use an ext load to change the number of vector elements!"); - } - bool Indexed = AM != ISD::UNINDEXED; assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); @@ -7802,48 +7821,29 @@ SDValue SelectionDAG::getIndexedLoadVP(SDValue OrigLoad, const SDLoc &dl, } SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, - SDValue Ptr, SDValue Mask, SDValue EVL, - MachinePointerInfo PtrInfo, Align Alignment, - MachineMemOperand::Flags MMOFlags, - const AAMDNodes &AAInfo, bool IsCompressing) { + SDValue Ptr, SDValue Offset, SDValue Mask, + SDValue EVL, EVT MemVT, MachineMemOperand *MMO, + ISD::MemIndexedMode AM, bool IsTruncating, + bool IsCompressing) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - - MMOFlags |= MachineMemOperand::MOStore; - assert((MMOFlags & MachineMemOperand::MOLoad) == 0); - - if (PtrInfo.V.isNull()) - PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); - - MachineFunction &MF = getMachineFunction(); - uint64_t Size = - MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize()); - MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); - return getStoreVP(Chain, dl, Val, Ptr, Mask, EVL, MMO, IsCompressing); -} - -SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, - SDValue Ptr, SDValue Mask, SDValue EVL, - MachineMemOperand *MMO, bool IsCompressing) { - assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - EVT VT = Val.getValueType(); - SDVTList VTs = getVTList(MVT::Other); - SDValue Undef = getUNDEF(Ptr.getValueType()); - SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL}; + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.isUndef()) && "Unindexed vp_store with an offset!"); + SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other) + : getVTList(MVT::Other); + SDValue Ops[] = {Chain, Val, Ptr, Offset, Mask, EVL}; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops); - ID.AddInteger(VT.getRawBits()); + ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>( - dl.getIROrder(), VTs, ISD::UNINDEXED, false, IsCompressing, VT, MMO)); + dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<VPStoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - auto *N = - newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, - ISD::UNINDEXED, false, IsCompressing, VT, MMO); + auto *N = newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, + IsTruncating, IsCompressing, MemVT, MMO); createOperands(N, Ops); CSEMap.InsertNode(N, IP); @@ -7885,7 +7885,9 @@ SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl, assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (VT == SVT) - return getStoreVP(Chain, dl, Val, Ptr, Mask, EVL, MMO, IsCompressing); + return getStoreVP(Chain, dl, Val, Ptr, getUNDEF(Ptr.getValueType()), Mask, + EVL, VT, MMO, ISD::UNINDEXED, + /*IsTruncating*/ false, IsCompressing); assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && "Should only be a truncating store, not extending!"); @@ -10661,6 +10663,23 @@ SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, return std::make_pair(Lo, Hi); } +std::pair<SDValue, SDValue> SelectionDAG::SplitEVL(SDValue N, EVT VecVT, + const SDLoc &DL) { + // Split the vector length parameter. + // %evl -> umin(%evl, %halfnumelts) and usubsat(%evl - %halfnumelts). + EVT VT = N.getValueType(); + assert(VecVT.getVectorElementCount().isKnownEven() && + "Expecting the mask to be an evenly-sized vector"); + unsigned HalfMinNumElts = VecVT.getVectorMinNumElements() / 2; + SDValue HalfNumElts = + VecVT.isFixedLengthVector() + ? getConstant(HalfMinNumElts, DL, VT) + : getVScale(DL, VT, APInt(VT.getScalarSizeInBits(), HalfMinNumElts)); + SDValue Lo = getNode(ISD::UMIN, DL, VT, N, HalfNumElts); + SDValue Hi = getNode(ISD::USUBSAT, DL, VT, N, HalfNumElts); + return std::make_pair(Lo, Hi); +} + /// Widen the vector up to the next power of two using INSERT_SUBVECTOR. SDValue SelectionDAG::WidenVector(const SDValue &N, const SDLoc &DL) { EVT VT = N.getValueType(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 63cd723cf6da..41460f78e1c2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1683,6 +1683,8 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(V)) { return DAG.getMDNode(cast<MDNode>(MD->getMetadata())); } + if (const auto *BB = dyn_cast<BasicBlock>(V)) + return DAG.getBasicBlock(FuncInfo.MBBMap[BB]); llvm_unreachable("Can't get register for value!"); } @@ -4846,10 +4848,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } if (!I.getType()->isVoidTy()) { - if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) { - EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy); - Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); - } else + if (!isa<VectorType>(I.getType())) Result = lowerRangeToAssertZExt(DAG, I, Result); MaybeAlign Alignment = I.getRetAlign(); @@ -7327,8 +7326,6 @@ void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); Value *PtrOperand = VPIntrin.getArgOperand(0); MaybeAlign Alignment = VPIntrin.getPointerAlignment(); - if (!Alignment) - Alignment = DAG.getEVTAlign(VT); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range); SDValue LD; @@ -7336,6 +7333,8 @@ void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT, if (!IsGather) { // Do not serialize variable-length loads of constant memory with // anything. + if (!Alignment) + Alignment = DAG.getEVTAlign(VT); MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); AddToChain = !AA || !AA->pointsToConstantMemory(ML); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); @@ -7345,6 +7344,8 @@ void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT, LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2], MMO, false /*IsExpanding */); } else { + if (!Alignment) + Alignment = DAG.getEVTAlign(VT.getScalarType()); unsigned AS = PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( @@ -7385,18 +7386,22 @@ void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin, Value *PtrOperand = VPIntrin.getArgOperand(1); EVT VT = OpValues[0].getValueType(); MaybeAlign Alignment = VPIntrin.getPointerAlignment(); - if (!Alignment) - Alignment = DAG.getEVTAlign(VT); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); SDValue ST; if (!IsScatter) { + if (!Alignment) + Alignment = DAG.getEVTAlign(VT); + SDValue Ptr = OpValues[1]; + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, MemoryLocation::UnknownSize, *Alignment, AAInfo); - ST = - DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], OpValues[1], - OpValues[2], OpValues[3], MMO, false /* IsTruncating */); + ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset, + OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED, + /* IsTruncating */ false, /*IsCompressing*/ false); } else { + if (!Alignment) + Alignment = DAG.getEVTAlign(VT.getScalarType()); unsigned AS = PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( @@ -8250,7 +8255,8 @@ public: /// corresponds to. If there is no Value* for this operand, it returns /// MVT::Other. EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI, - const DataLayout &DL) const { + const DataLayout &DL, + llvm::Type *ParamElemType) const { if (!CallOperandVal) return MVT::Other; if (isa<BasicBlock>(CallOperandVal)) @@ -8262,10 +8268,8 @@ public: // If this is an indirect operand, the operand is a pointer to the // accessed type. if (isIndirect) { - PointerType *PtrTy = dyn_cast<PointerType>(OpTy); - if (!PtrTy) - report_fatal_error("Indirect operand for inline asm not a pointer!"); - OpTy = PtrTy->getElementType(); + OpTy = ParamElemType; + assert(OpTy && "Indirect opernad must have elementtype attribute"); } // Look for vector wrapped in a struct. e.g. { <16 x i8> }. @@ -8559,37 +8563,19 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. unsigned ResNo = 0; // ResNo - The result number of the next output. - unsigned NumMatchingOps = 0; for (auto &T : TargetConstraints) { ConstraintOperands.push_back(SDISelAsmOperandInfo(T)); SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); // Compute the value type for each operand. - if (OpInfo.Type == InlineAsm::isInput || - (OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) { - OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++); - - // Process the call argument. BasicBlocks are labels, currently appearing - // only in asm's. - if (isa<CallBrInst>(Call) && - ArgNo - 1 >= (cast<CallBrInst>(&Call)->arg_size() - - cast<CallBrInst>(&Call)->getNumIndirectDests() - - NumMatchingOps) && - (NumMatchingOps == 0 || - ArgNo - 1 < - (cast<CallBrInst>(&Call)->arg_size() - NumMatchingOps))) { - const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal); - EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true); - OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT); - } else if (const auto *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { - OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); - } else { - OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); - } - + if (OpInfo.hasArg()) { + OpInfo.CallOperandVal = Call.getArgOperand(ArgNo); + OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); + Type *ParamElemTy = Call.getAttributes().getParamElementType(ArgNo); EVT VT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, - DAG.getDataLayout()); + DAG.getDataLayout(), ParamElemTy); OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other; + ArgNo++; } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) { // The return value of the call is this value. As such, there is no // corresponding argument. @@ -8607,9 +8593,6 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, OpInfo.ConstraintVT = MVT::Other; } - if (OpInfo.hasMatchingInput()) - ++NumMatchingOps; - if (!HasSideEffect) HasSideEffect = OpInfo.hasMemory(TLI); @@ -11246,12 +11229,6 @@ void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) { unsigned NumElts = VT.getVectorNumElements(); - if ((-Imm > NumElts) || (Imm >= NumElts)) { - // Result is undefined if immediate is out-of-bounds. - setValue(&I, DAG.getUNDEF(VT)); - return; - } - uint64_t Idx = (NumElts + Imm) % NumElts; // Use VECTOR_SHUFFLE to maintain original behaviour for fixed-length vectors. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index e6b06ab93d6b..a98c21f16c71 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -60,7 +60,7 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, // Conservatively require the attributes of the call to match those of // the return. Ignore following attributes because they don't affect the // call sequence. - AttrBuilder CallerAttrs(F.getAttributes(), AttributeList::ReturnIndex); + AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs()); for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable, Attribute::DereferenceableOrNull, Attribute::NoAlias, Attribute::NonNull}) @@ -1806,6 +1806,31 @@ bool TargetLowering::SimplifyDemandedBits( } case ISD::BSWAP: { SDValue Src = Op.getOperand(0); + + // If the only bits demanded come from one byte of the bswap result, + // just shift the input byte into position to eliminate the bswap. + unsigned NLZ = DemandedBits.countLeadingZeros(); + unsigned NTZ = DemandedBits.countTrailingZeros(); + + // Round NTZ down to the next byte. If we have 11 trailing zeros, then + // we need all the bits down to bit 8. Likewise, round NLZ. If we + // have 14 leading zeros, round to 8. + NLZ = alignDown(NLZ, 8); + NTZ = alignDown(NTZ, 8); + // If we need exactly one byte, we can do this transformation. + if (BitWidth - NLZ - NTZ == 8) { + // Replace this with either a left or right shift to get the byte into + // the right place. + unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL; + if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) { + EVT ShiftAmtTy = getShiftAmountTy(VT, DL); + unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ; + SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy); + SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt); + return TLO.CombineTo(Op, NewOp); + } + } + APInt DemandedSrcBits = DemandedBits.byteSwap(); if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO, Depth + 1)) @@ -1833,19 +1858,15 @@ bool TargetLowering::SimplifyDemandedBits( // If we only care about the highest bit, don't bother shifting right. if (DemandedBits.isSignMask()) { unsigned MinSignedBits = - TLO.DAG.ComputeMinSignedBits(Op0, DemandedElts, Depth + 1); + TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1); bool AlreadySignExtended = ExVTBits >= MinSignedBits; // However if the input is already sign extended we expect the sign // extension to be dropped altogether later and do not simplify. if (!AlreadySignExtended) { // Compute the correct shift amount type, which must be getShiftAmountTy // for scalar types after legalization. - EVT ShiftAmtTy = VT; - if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) - ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL); - - SDValue ShiftAmt = - TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy); + SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl, + getShiftAmountTy(VT, DL)); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt)); } @@ -3233,17 +3254,29 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT, SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, DAGCombinerInfo &DCI) const { - // Match these patterns in any of their permutations: - // (X & Y) == Y - // (X & Y) != Y if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND) std::swap(N0, N1); + SelectionDAG &DAG = DCI.DAG; EVT OpVT = N0.getValueType(); if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() || (Cond != ISD::SETEQ && Cond != ISD::SETNE)) return SDValue(); + // (X & Y) != 0 --> zextOrTrunc(X & Y) + // iff everything but LSB is known zero: + if (Cond == ISD::SETNE && isNullConstant(N1) && + (getBooleanContents(OpVT) == TargetLowering::UndefinedBooleanContent || + getBooleanContents(OpVT) == TargetLowering::ZeroOrOneBooleanContent)) { + unsigned NumEltBits = OpVT.getScalarSizeInBits(); + APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1); + if (DAG.MaskedValueIsZero(N0, UpperBits)) + return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT); + } + + // Match these patterns in any of their permutations: + // (X & Y) == Y + // (X & Y) != Y SDValue X, Y; if (N0.getOperand(0) == N1) { X = N0.getOperand(1); @@ -3255,7 +3288,6 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, return SDValue(); } - SelectionDAG &DAG = DCI.DAG; SDValue Zero = DAG.getConstant(0, DL, OpVT); if (DAG.isKnownToBeAPowerOfTwo(Y)) { // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set. @@ -3678,9 +3710,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } // Figure out how many bits we need to preserve this constant. - unsigned ReqdBits = Signed ? - C1.getBitWidth() - C1.getNumSignBits() + 1 : - C1.getActiveBits(); + unsigned ReqdBits = Signed ? C1.getMinSignedBits() : C1.getActiveBits(); // Make sure we're not losing bits from the constant. if (MinBits > 0 && @@ -4594,20 +4624,12 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter = Constraint[0]; switch (ConstraintLetter) { default: break; - case 'X': // Allows any operand; labels (basic block) use this. - if (Op.getOpcode() == ISD::BasicBlock || - Op.getOpcode() == ISD::TargetBlockAddress) { - Ops.push_back(Op); - return; - } - LLVM_FALLTHROUGH; + case 'X': // Allows any operand case 'i': // Simple Integer or Relocatable Constant case 'n': // Simple Integer case 's': { // Relocatable Constant - GlobalAddressSDNode *GA; ConstantSDNode *C; - BlockAddressSDNode *BA; uint64_t Offset = 0; // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C), @@ -4615,13 +4637,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible // while in this case the GA may be furthest from the root node which is // likely an ISD::ADD. - while (1) { - if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && ConstraintLetter != 'n') { - Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op), - GA->getValueType(0), - Offset + GA->getOffset())); - return; - } + while (true) { if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') { // gcc prints these as sign extended. Sign extend value to 64 bits // now; without this it would get ZExt'd later in @@ -4636,11 +4652,23 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64)); return; } - if ((BA = dyn_cast<BlockAddressSDNode>(Op)) && ConstraintLetter != 'n') { - Ops.push_back(DAG.getTargetBlockAddress( - BA->getBlockAddress(), BA->getValueType(0), - Offset + BA->getOffset(), BA->getTargetFlags())); - return; + if (ConstraintLetter != 'n') { + if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) { + Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op), + GA->getValueType(0), + Offset + GA->getOffset())); + return; + } + if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) { + Ops.push_back(DAG.getTargetBlockAddress( + BA->getBlockAddress(), BA->getValueType(0), + Offset + BA->getOffset(), BA->getTargetFlags())); + return; + } + if (isa<BasicBlockSDNode>(Op)) { + Ops.push_back(Op); + return; + } } const unsigned OpCode = Op.getOpcode(); if (OpCode == ISD::ADD || OpCode == ISD::SUB) { @@ -4753,7 +4781,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL, case InlineAsm::isOutput: // Indirect outputs just consume an argument. if (OpInfo.isIndirect) { - OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++); + OpInfo.CallOperandVal = Call.getArgOperand(ArgNo); break; } @@ -4771,7 +4799,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL, ++ResNo; break; case InlineAsm::isInput: - OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++); + OpInfo.CallOperandVal = Call.getArgOperand(ArgNo); break; case InlineAsm::isClobber: // Nothing to do. @@ -4781,10 +4809,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL, if (OpInfo.CallOperandVal) { llvm::Type *OpTy = OpInfo.CallOperandVal->getType(); if (OpInfo.isIndirect) { - llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); - if (!PtrTy) - report_fatal_error("Indirect operand for inline asm not a pointer!"); - OpTy = PtrTy->getElementType(); + OpTy = Call.getAttributes().getParamElementType(ArgNo); + assert(OpTy && "Indirect opernad must have elementtype attribute"); } // Look for vector wrapped in a struct. e.g. { <16 x i8> }. @@ -4814,6 +4840,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL, } else { OpInfo.ConstraintVT = MVT::getVT(OpTy, true); } + + ArgNo++; } } @@ -5087,17 +5115,18 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, // 'X' matches anything. if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) { - // Labels and constants are handled elsewhere ('X' is the only thing - // that matches labels). For Functions, the type here is the type of - // the result, which is not what we want to look at; leave them alone. + // Constants are handled elsewhere. For Functions, the type here is the + // type of the result, which is not what we want to look at; leave them + // alone. Value *v = OpInfo.CallOperandVal; - if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) { - OpInfo.CallOperandVal = v; + if (isa<ConstantInt>(v) || isa<Function>(v)) { return; } - if (Op.getNode() && Op.getOpcode() == ISD::TargetBlockAddress) + if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) { + OpInfo.ConstraintCode = "i"; return; + } // Otherwise, try to resolve it to something we know about by looking at // the actual operand type. @@ -6438,12 +6467,6 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, unsigned ShiftAmount = OuterBitSize - InnerBitSize; EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout()); - if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) { - // FIXME getShiftAmountTy does not always return a sensible result when VT - // is an illegal type, and so the type may be too small to fit the shift - // amount. Override it with i32. The shift will have to be legalized. - ShiftAmountTy = MVT::i32; - } SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy); if (!LH.getNode() && !RH.getNode() && diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp index f89069e9f728..f6ad2b50abcd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -273,6 +273,8 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, LLVM_DEBUG(dbgs() << "Frame instruction: " << MI << '\n'); return true; } + const MachineFunction *MF = MI.getParent()->getParent(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); for (const MachineOperand &MO : MI.operands()) { bool UseOrDefCSR = false; if (MO.isReg()) { @@ -288,8 +290,14 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, // separately. An SP mentioned by a call instruction, we can ignore, // though, as it's harmless and we do not want to effectively disable tail // calls by forcing the restore point to post-dominate them. - UseOrDefCSR = (!MI.isCall() && PhysReg == SP) || - RCI.getLastCalleeSavedAlias(PhysReg); + // PPC's LR is also not normally described as a callee-saved register in + // calling convention definitions, so we need to watch for it, too. An LR + // mentioned implicitly by a return (or "branch to link register") + // instruction we can ignore, otherwise we may pessimize shrinkwrapping. + UseOrDefCSR = + (!MI.isCall() && PhysReg == SP) || + RCI.getLastCalleeSavedAlias(PhysReg) || + (!MI.isReturn() && TRI->isNonallocatableRegisterCalleeSave(PhysReg)); } else if (MO.isRegMask()) { // Check if this regmask clobbers any of the CSRs. for (unsigned Reg : getCurrentCSRs(RS)) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp index f0d342d26cc4..f69e50eaa0ca 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -715,6 +715,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { SchedPreferenceInfo = Sched::ILP; GatherAllAliasesMaxDepth = 18; IsStrictFPEnabled = DisableStrictNodeMutation; + MaxBytesForAlignment = 0; // TODO: the default will be switched to 0 in the next commit, along // with the Target-specific changes necessary. MaxAtomicSizeInBitsSupported = 1024; @@ -2040,6 +2041,11 @@ Align TargetLoweringBase::getPrefLoopAlignment(MachineLoop *ML) const { return PrefLoopAlignment; } +unsigned TargetLoweringBase::getMaxPermittedBytesForAlignment( + MachineBasicBlock *MBB) const { + return MaxBytesForAlignment; +} + //===----------------------------------------------------------------------===// // Reciprocal Estimates //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index d1c2cdeb133b..ce350034d073 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -108,8 +108,7 @@ static void GetObjCImageInfo(Module &M, unsigned &Version, unsigned &Flags, // ELF //===----------------------------------------------------------------------===// -TargetLoweringObjectFileELF::TargetLoweringObjectFileELF() - : TargetLoweringObjectFile() { +TargetLoweringObjectFileELF::TargetLoweringObjectFileELF() { SupportDSOLocalEquivalentLowering = true; } @@ -478,6 +477,11 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { return K; } +static bool hasPrefix(StringRef SectionName, StringRef Prefix) { + return SectionName.consume_front(Prefix) && + (SectionName.empty() || SectionName[0] == '.'); +} + static unsigned getELFSectionType(StringRef Name, SectionKind K) { // Use SHT_NOTE for section whose name starts with ".note" to allow // emitting ELF notes from C variable declaration. @@ -485,13 +489,13 @@ static unsigned getELFSectionType(StringRef Name, SectionKind K) { if (Name.startswith(".note")) return ELF::SHT_NOTE; - if (Name == ".init_array") + if (hasPrefix(Name, ".init_array")) return ELF::SHT_INIT_ARRAY; - if (Name == ".fini_array") + if (hasPrefix(Name, ".fini_array")) return ELF::SHT_FINI_ARRAY; - if (Name == ".preinit_array") + if (hasPrefix(Name, ".preinit_array")) return ELF::SHT_PREINIT_ARRAY; if (K.isBSS() || K.isThreadBSS()) @@ -1139,8 +1143,7 @@ TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) { // MachO //===----------------------------------------------------------------------===// -TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO() - : TargetLoweringObjectFile() { +TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO() { SupportIndirectSymViaGOTPCRel = true; } @@ -1185,6 +1188,7 @@ void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer, StringRef SectionVal; GetObjCImageInfo(M, VersionVal, ImageInfoFlags, SectionVal); + emitCGProfileMetadata(Streamer, M); // The section is mandatory. If we don't have it, then we don't have GC info. if (SectionVal.empty()) @@ -2543,8 +2547,7 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry( //===----------------------------------------------------------------------===// // GOFF //===----------------------------------------------------------------------===// -TargetLoweringObjectFileGOFF::TargetLoweringObjectFileGOFF() - : TargetLoweringObjectFile() {} +TargetLoweringObjectFileGOFF::TargetLoweringObjectFileGOFF() {} MCSection *TargetLoweringObjectFileGOFF::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp index 402e21d3708b..05004fb935df 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -328,7 +328,7 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID, // Find the FSProfile file name. The internal option takes the precedence // before getting from TargetMachine. -static const std::string getFSProfileFile(const TargetMachine *TM) { +static std::string getFSProfileFile(const TargetMachine *TM) { if (!FSProfileFile.empty()) return FSProfileFile.getValue(); const Optional<PGOOptions> &PGOOpt = TM->getPGOOption(); @@ -339,7 +339,7 @@ static const std::string getFSProfileFile(const TargetMachine *TM) { // Find the Profile remapping file name. The internal option takes the // precedence before getting from TargetMachine. -static const std::string getFSRemappingFile(const TargetMachine *TM) { +static std::string getFSRemappingFile(const TargetMachine *TM) { if (!FSRemappingFile.empty()) return FSRemappingFile.getValue(); const Optional<PGOOptions> &PGOOpt = TM->getPGOOption(); @@ -1399,6 +1399,9 @@ bool TargetPassConfig::addRegAssignAndRewriteOptimized() { // Finally rewrite virtual registers. addPass(&VirtRegRewriterID); + // Regalloc scoring for ML-driven eviction - noop except when learning a new + // eviction policy. + addPass(createRegAllocScoringPass()); return true; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp index f5cb518fce3e..6bcf79547056 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -552,7 +552,7 @@ bool TargetRegisterInfo::getCoveringSubRegIndexes( // Abort if we cannot possibly implement the COPY with the given indexes. if (BestIdx == 0) - return 0; + return false; NeededIndexes.push_back(BestIdx); @@ -581,7 +581,7 @@ bool TargetRegisterInfo::getCoveringSubRegIndexes( } if (BestIdx == 0) - return 0; // Impossible to handle + return false; // Impossible to handle NeededIndexes.push_back(BestIdx); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp index d042deefd746..01ea171e5ea2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp @@ -116,11 +116,11 @@ class IRPromoter { SmallPtrSet<Value*, 8> Promoted; void ReplaceAllUsersOfWith(Value *From, Value *To); - void ExtendSources(void); - void ConvertTruncs(void); - void PromoteTree(void); - void TruncateSinks(void); - void Cleanup(void); + void ExtendSources(); + void ConvertTruncs(); + void PromoteTree(); + void TruncateSinks(); + void Cleanup(); public: IRPromoter(LLVMContext &C, IntegerType *Ty, unsigned Width, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp index cbc5d9ec169b..5f59cb4643f2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp @@ -293,7 +293,7 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { const std::vector<unsigned> &MaxPressure = DAG->getRegPressure().MaxSetPressure; - HighPressureSets.assign(MaxPressure.size(), 0); + HighPressureSets.assign(MaxPressure.size(), false); for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) { unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i); HighPressureSets[i] = |
