aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h28
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp128
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp261
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp862
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h119
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp443
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h507
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp354
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp221
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h61
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp89
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp826
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp197
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp75
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp131
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp2
86 files changed, 3494 insertions, 1632 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 5c64622c7245..bb71d72256d8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -120,8 +120,7 @@ bool AggressiveAntiDepState::IsLive(unsigned Reg) {
AggressiveAntiDepBreaker::AggressiveAntiDepBreaker(
MachineFunction &MFi, const RegisterClassInfo &RCI,
TargetSubtargetInfo::RegClassVector &CriticalPathRCs)
- : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()),
- TII(MF.getSubtarget().getInstrInfo()),
+ : MF(MFi), MRI(MF.getRegInfo()), TII(MF.getSubtarget().getInstrInfo()),
TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI) {
/* Collect a bitset of all registers that are only broken if they
are on the critical path. */
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
index 7e68e5e22879..e8fef505e43d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
@@ -577,9 +577,9 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
bool &ADS = AllowDifferingSizes ? *AllowDifferingSizes : DummyADS;
ADS = true;
- AttrBuilder CallerAttrs(F->getAttributes(), AttributeList::ReturnIndex);
- AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(),
- AttributeList::ReturnIndex);
+ AttrBuilder CallerAttrs(F->getContext(), F->getAttributes().getRetAttrs());
+ AttrBuilder CalleeAttrs(F->getContext(),
+ cast<CallInst>(I)->getAttributes().getRetAttrs());
// Following attributes are completely benign as far as calling convention
// goes, they shouldn't affect whether the call is a tail call.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
index 964cef75d164..03e63321e3c4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
@@ -23,6 +23,8 @@ namespace llvm {
AIXException::AIXException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {}
+void AIXException::markFunctionEnd() { endFragment(); }
+
void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA,
const MCSymbol *PerSym) {
// Generate EH Info Table.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 533f20535655..4f3f798fe6f8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -247,6 +247,11 @@ void AsmPrinter::emitInitialRawDwarfLocDirective(const MachineFunction &MF) {
if (DD) {
assert(OutStreamer->hasRawTextSupport() &&
"Expected assembly output mode.");
+ // This is NVPTX specific and it's unclear why.
+ // PR51079: If we have code without debug information we need to give up.
+ DISubprogram *MFSP = MF.getFunction().getSubprogram();
+ if (!MFSP)
+ return;
(void)DD->emitInitialLocDirective(MF, /*CUID=*/0);
}
}
@@ -2477,7 +2482,8 @@ void AsmPrinter::emitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
// two boundary. If a global value is specified, and if that global has
// an explicit alignment requested, it will override the alignment request
// if required for correctness.
-void AsmPrinter::emitAlignment(Align Alignment, const GlobalObject *GV) const {
+void AsmPrinter::emitAlignment(Align Alignment, const GlobalObject *GV,
+ unsigned MaxBytesToEmit) const {
if (GV)
Alignment = getGVAlignment(GV, GV->getParent()->getDataLayout(), Alignment);
@@ -2490,9 +2496,9 @@ void AsmPrinter::emitAlignment(Align Alignment, const GlobalObject *GV) const {
STI = &getSubtargetInfo();
else
STI = TM.getMCSubtargetInfo();
- OutStreamer->emitCodeAlignment(Alignment.value(), STI);
+ OutStreamer->emitCodeAlignment(Alignment.value(), STI, MaxBytesToEmit);
} else
- OutStreamer->emitValueToAlignment(Alignment.value());
+ OutStreamer->emitValueToAlignment(Alignment.value(), 0, 1, MaxBytesToEmit);
}
//===----------------------------------------------------------------------===//
@@ -3286,7 +3292,7 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
// Emit an alignment directive for this block, if needed.
const Align Alignment = MBB.getAlignment();
if (Alignment != Align(1))
- emitAlignment(Alignment);
+ emitAlignment(Alignment, nullptr, MBB.getMaxBytesForAlignment());
// Switch to a new section if this basic block must begin a section. The
// entry block is always placed in the function section and is handled
@@ -3648,6 +3654,12 @@ unsigned int AsmPrinter::getDwarfOffsetByteSize() const {
OutStreamer->getContext().getDwarfFormat());
}
+dwarf::FormParams AsmPrinter::getDwarfFormParams() const {
+ return {getDwarfVersion(), uint8_t(getPointerSize()),
+ OutStreamer->getContext().getDwarfFormat(),
+ MAI->doesDwarfUseRelocationsAcrossSections()};
+}
+
unsigned int AsmPrinter::getUnitLengthFieldByteSize() const {
return dwarf::getUnitLengthFieldByteSize(
OutStreamer->getContext().getDwarfFormat());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
index 5e7db1f2f76c..bd2c60eadd61 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -33,6 +33,7 @@ class ByteStreamer {
virtual void emitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0;
virtual void emitULEB128(uint64_t DWord, const Twine &Comment = "",
unsigned PadTo = 0) = 0;
+ virtual unsigned emitDIERef(const DIE &D) = 0;
};
class APByteStreamer final : public ByteStreamer {
@@ -54,15 +55,24 @@ public:
AP.OutStreamer->AddComment(Comment);
AP.emitULEB128(DWord, nullptr, PadTo);
}
+ unsigned emitDIERef(const DIE &D) override {
+ uint64_t Offset = D.getOffset();
+ static constexpr unsigned ULEB128PadSize = 4;
+ assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
+ emitULEB128(Offset, "", ULEB128PadSize);
+ // Return how many comments to skip in DwarfDebug::emitDebugLocEntry to keep
+ // comments aligned with debug loc entries.
+ return ULEB128PadSize;
+ }
};
class HashingByteStreamer final : public ByteStreamer {
private:
DIEHash &Hash;
public:
- HashingByteStreamer(DIEHash &H) : Hash(H) {}
- void emitInt8(uint8_t Byte, const Twine &Comment) override {
- Hash.update(Byte);
+ HashingByteStreamer(DIEHash &H) : Hash(H) {}
+ void emitInt8(uint8_t Byte, const Twine &Comment) override {
+ Hash.update(Byte);
}
void emitSLEB128(uint64_t DWord, const Twine &Comment) override {
Hash.addSLEB128(DWord);
@@ -71,6 +81,10 @@ class HashingByteStreamer final : public ByteStreamer {
unsigned PadTo) override {
Hash.addULEB128(DWord);
}
+ unsigned emitDIERef(const DIE &D) override {
+ Hash.hashRawTypeReference(D);
+ return 0; // Only used together with the APByteStreamer.
+ }
};
class BufferByteStreamer final : public ByteStreamer {
@@ -115,9 +129,15 @@ public:
// with each other.
for (size_t i = 1; i < Length; ++i)
Comments.push_back("");
-
}
}
+ unsigned emitDIERef(const DIE &D) override {
+ uint64_t Offset = D.getOffset();
+ static constexpr unsigned ULEB128PadSize = 4;
+ assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
+ emitULEB128(Offset, "", ULEB128PadSize);
+ return 0; // Only used together with the APByteStreamer.
+ }
};
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index d621108408f0..52c74713551c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -68,6 +68,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -600,6 +601,8 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
return SourceLanguage::D;
case dwarf::DW_LANG_Swift:
return SourceLanguage::Swift;
+ case dwarf::DW_LANG_Rust:
+ return SourceLanguage::Rust;
default:
// There's no CodeView representation for this language, and CV doesn't
// have an "unknown" option for the language field, so we'll use MASM,
@@ -843,6 +846,12 @@ void CodeViewDebug::emitCompilerInformation() {
if (MMI->getModule()->getProfileSummary(/*IsCS*/ false) != nullptr) {
Flags |= static_cast<uint32_t>(CompileSym3Flags::PGO);
}
+ using ArchType = llvm::Triple::ArchType;
+ ArchType Arch = Triple(MMI->getModule()->getTargetTriple()).getArch();
+ if (Asm->TM.Options.Hotpatch || Arch == ArchType::thumb ||
+ Arch == ArchType::aarch64) {
+ Flags |= static_cast<uint32_t>(CompileSym3Flags::HotPatch);
+ }
OS.AddComment("Flags and language");
OS.emitInt32(Flags);
@@ -857,8 +866,10 @@ void CodeViewDebug::emitCompilerInformation() {
StringRef CompilerVersion = CU->getProducer();
Version FrontVer = parseVersion(CompilerVersion);
OS.AddComment("Frontend version");
- for (int N : FrontVer.Part)
+ for (int N : FrontVer.Part) {
+ N = std::min<int>(N, std::numeric_limits<uint16_t>::max());
OS.emitInt16(N);
+ }
// Some Microsoft tools, like Binscope, expect a backend version number of at
// least 8.something, so we'll coerce the LLVM version into a form that
@@ -885,6 +896,34 @@ static TypeIndex getStringIdTypeIdx(GlobalTypeTableBuilder &TypeTable,
return TypeTable.writeLeafType(SIR);
}
+static std::string flattenCommandLine(ArrayRef<std::string> Args,
+ StringRef MainFilename) {
+ std::string FlatCmdLine;
+ raw_string_ostream OS(FlatCmdLine);
+ bool PrintedOneArg = false;
+ if (!StringRef(Args[0]).contains("-cc1")) {
+ llvm::sys::printArg(OS, "-cc1", /*Quote=*/true);
+ PrintedOneArg = true;
+ }
+ for (unsigned i = 0; i < Args.size(); i++) {
+ StringRef Arg = Args[i];
+ if (Arg.empty())
+ continue;
+ if (Arg == "-main-file-name" || Arg == "-o") {
+ i++; // Skip this argument and next one.
+ continue;
+ }
+ if (Arg.startswith("-object-file-name") || Arg == MainFilename)
+ continue;
+ if (PrintedOneArg)
+ OS << " ";
+ llvm::sys::printArg(OS, Arg, /*Quote=*/true);
+ PrintedOneArg = true;
+ }
+ OS.flush();
+ return FlatCmdLine;
+}
+
void CodeViewDebug::emitBuildInfo() {
// First, make LF_BUILDINFO. It's a sequence of strings with various bits of
// build info. The known prefix is:
@@ -905,8 +944,16 @@ void CodeViewDebug::emitBuildInfo() {
getStringIdTypeIdx(TypeTable, MainSourceFile->getDirectory());
BuildInfoArgs[BuildInfoRecord::SourceFile] =
getStringIdTypeIdx(TypeTable, MainSourceFile->getFilename());
- // FIXME: Path to compiler and command line. PDB is intentionally blank unless
- // we implement /Zi type servers.
+ // FIXME: PDB is intentionally blank unless we implement /Zi type servers.
+ BuildInfoArgs[BuildInfoRecord::TypeServerPDB] =
+ getStringIdTypeIdx(TypeTable, "");
+ if (Asm->TM.Options.MCOptions.Argv0 != nullptr) {
+ BuildInfoArgs[BuildInfoRecord::BuildTool] =
+ getStringIdTypeIdx(TypeTable, Asm->TM.Options.MCOptions.Argv0);
+ BuildInfoArgs[BuildInfoRecord::CommandLine] = getStringIdTypeIdx(
+ TypeTable, flattenCommandLine(Asm->TM.Options.MCOptions.CommandLineArgs,
+ MainSourceFile->getFilename()));
+ }
BuildInfoRecord BIR(BuildInfoArgs);
TypeIndex BuildInfoIndex = TypeTable.writeLeafType(BIR);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 2834d9c3ebbf..1a0256f30d41 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -274,7 +274,7 @@ LLVM_DUMP_METHOD void DIE::dump() const {
}
#endif
-unsigned DIE::computeOffsetsAndAbbrevs(const AsmPrinter *AP,
+unsigned DIE::computeOffsetsAndAbbrevs(const dwarf::FormParams &FormParams,
DIEAbbrevSet &AbbrevSet,
unsigned CUOffset) {
// Unique the abbreviation and fill in the abbreviation number so this DIE
@@ -289,7 +289,7 @@ unsigned DIE::computeOffsetsAndAbbrevs(const AsmPrinter *AP,
// Add the byte size of all the DIE attribute values.
for (const auto &V : values())
- CUOffset += V.SizeOf(AP);
+ CUOffset += V.sizeOf(FormParams);
// Let the children compute their offsets and abbreviation numbers.
if (hasChildren()) {
@@ -297,7 +297,8 @@ unsigned DIE::computeOffsetsAndAbbrevs(const AsmPrinter *AP,
assert(Abbrev.hasChildren() && "Children flag not set");
for (auto &Child : children())
- CUOffset = Child.computeOffsetsAndAbbrevs(AP, AbbrevSet, CUOffset);
+ CUOffset =
+ Child.computeOffsetsAndAbbrevs(FormParams, AbbrevSet, CUOffset);
// Each child chain is terminated with a zero byte, adjust the offset.
CUOffset += sizeof(int8_t);
@@ -335,13 +336,13 @@ void DIEValue::emitValue(const AsmPrinter *AP) const {
}
}
-unsigned DIEValue::SizeOf(const AsmPrinter *AP) const {
+unsigned DIEValue::sizeOf(const dwarf::FormParams &FormParams) const {
switch (Ty) {
case isNone:
llvm_unreachable("Expected valid DIEValue");
#define HANDLE_DIEVALUE(T) \
case is##T: \
- return getDIE##T().SizeOf(AP, Form);
+ return getDIE##T().sizeOf(FormParams, Form);
#include "llvm/CodeGen/DIEValue.def"
}
llvm_unreachable("Unknown DIE kind");
@@ -407,7 +408,8 @@ void DIEInteger::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_strp_sup:
case dwarf::DW_FORM_addr:
case dwarf::DW_FORM_ref_addr:
- Asm->OutStreamer->emitIntValue(Integer, SizeOf(Asm, Form));
+ Asm->OutStreamer->emitIntValue(Integer,
+ sizeOf(Asm->getDwarfFormParams(), Form));
return;
case dwarf::DW_FORM_GNU_str_index:
case dwarf::DW_FORM_GNU_addr_index:
@@ -425,15 +427,12 @@ void DIEInteger::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
}
}
-/// SizeOf - Determine size of integer value in bytes.
+/// sizeOf - Determine size of integer value in bytes.
///
-unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
- assert(AP && "AsmPrinter is required to set FormParams");
- dwarf::FormParams Params = {AP->getDwarfVersion(),
- uint8_t(AP->getPointerSize()),
- AP->OutStreamer->getContext().getDwarfFormat()};
-
- if (Optional<uint8_t> FixedSize = dwarf::getFixedFormByteSize(Form, Params))
+unsigned DIEInteger::sizeOf(const dwarf::FormParams &FormParams,
+ dwarf::Form Form) const {
+ if (Optional<uint8_t> FixedSize =
+ dwarf::getFixedFormByteSize(Form, FormParams))
return *FixedSize;
switch (Form) {
@@ -464,19 +463,20 @@ void DIEInteger::print(raw_ostream &O) const {
/// EmitValue - Emit expression value.
///
void DIEExpr::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
- AP->emitDebugValue(Expr, SizeOf(AP, Form));
+ AP->emitDebugValue(Expr, sizeOf(AP->getDwarfFormParams(), Form));
}
/// SizeOf - Determine size of expression value in bytes.
///
-unsigned DIEExpr::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEExpr::sizeOf(const dwarf::FormParams &FormParams,
+ dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_data4:
return 4;
case dwarf::DW_FORM_data8:
return 8;
case dwarf::DW_FORM_sec_offset:
- return AP->getDwarfOffsetByteSize();
+ return FormParams.getDwarfOffsetByteSize();
default:
llvm_unreachable("DIE Value form not supported yet");
}
@@ -493,12 +493,14 @@ void DIEExpr::print(raw_ostream &O) const { O << "Expr: " << *Expr; }
///
void DIELabel::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
bool IsSectionRelative = Form != dwarf::DW_FORM_addr;
- AP->emitLabelReference(Label, SizeOf(AP, Form), IsSectionRelative);
+ AP->emitLabelReference(Label, sizeOf(AP->getDwarfFormParams(), Form),
+ IsSectionRelative);
}
-/// SizeOf - Determine size of label value in bytes.
+/// sizeOf - Determine size of label value in bytes.
///
-unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIELabel::sizeOf(const dwarf::FormParams &FormParams,
+ dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_data4:
return 4;
@@ -506,9 +508,9 @@ unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
return 8;
case dwarf::DW_FORM_sec_offset:
case dwarf::DW_FORM_strp:
- return AP->getDwarfOffsetByteSize();
+ return FormParams.getDwarfOffsetByteSize();
case dwarf::DW_FORM_addr:
- return AP->MAI->getCodePointerSize();
+ return FormParams.AddrSize;
default:
llvm_unreachable("DIE Value form not supported yet");
}
@@ -527,7 +529,7 @@ void DIEBaseTypeRef::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
AP->emitULEB128(Offset, nullptr, ULEB128PadSize);
}
-unsigned DIEBaseTypeRef::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEBaseTypeRef::sizeOf(const dwarf::FormParams &, dwarf::Form) const {
return ULEB128PadSize;
}
@@ -541,19 +543,21 @@ void DIEBaseTypeRef::print(raw_ostream &O) const { O << "BaseTypeRef: " << Index
/// EmitValue - Emit delta value.
///
void DIEDelta::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
- AP->emitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form));
+ AP->emitLabelDifference(LabelHi, LabelLo,
+ sizeOf(AP->getDwarfFormParams(), Form));
}
/// SizeOf - Determine size of delta value in bytes.
///
-unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEDelta::sizeOf(const dwarf::FormParams &FormParams,
+ dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_data4:
return 4;
case dwarf::DW_FORM_data8:
return 8;
case dwarf::DW_FORM_sec_offset:
- return AP->getDwarfOffsetByteSize();
+ return FormParams.getDwarfOffsetByteSize();
default:
llvm_unreachable("DIE Value form not supported yet");
}
@@ -592,9 +596,10 @@ void DIEString::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
}
}
-/// SizeOf - Determine size of delta value in bytes.
+/// sizeOf - Determine size of delta value in bytes.
///
-unsigned DIEString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEString::sizeOf(const dwarf::FormParams &FormParams,
+ dwarf::Form Form) const {
// Index of string in symbol table.
switch (Form) {
case dwarf::DW_FORM_GNU_str_index:
@@ -603,11 +608,11 @@ unsigned DIEString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
case dwarf::DW_FORM_strx2:
case dwarf::DW_FORM_strx3:
case dwarf::DW_FORM_strx4:
- return DIEInteger(S.getIndex()).SizeOf(AP, Form);
+ return DIEInteger(S.getIndex()).sizeOf(FormParams, Form);
case dwarf::DW_FORM_strp:
- if (AP->MAI->doesDwarfUseRelocationsAcrossSections())
- return DIELabel(S.getSymbol()).SizeOf(AP, Form);
- return DIEInteger(S.getOffset()).SizeOf(AP, Form);
+ if (FormParams.DwarfUsesRelocationsAcrossSections)
+ return DIELabel(S.getSymbol()).sizeOf(FormParams, Form);
+ return DIEInteger(S.getOffset()).sizeOf(FormParams, Form);
default:
llvm_unreachable("Expected valid string form");
}
@@ -630,7 +635,7 @@ void DIEInlineString::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
llvm_unreachable("Expected valid string form");
}
-unsigned DIEInlineString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEInlineString::sizeOf(const dwarf::FormParams &, dwarf::Form) const {
// Emit string bytes + NULL byte.
return S.size() + 1;
}
@@ -653,7 +658,8 @@ void DIEEntry::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
case dwarf::DW_FORM_ref2:
case dwarf::DW_FORM_ref4:
case dwarf::DW_FORM_ref8:
- AP->OutStreamer->emitIntValue(Entry->getOffset(), SizeOf(AP, Form));
+ AP->OutStreamer->emitIntValue(Entry->getOffset(),
+ sizeOf(AP->getDwarfFormParams(), Form));
return;
case dwarf::DW_FORM_ref_udata:
@@ -665,11 +671,12 @@ void DIEEntry::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
uint64_t Addr = Entry->getDebugSectionOffset();
if (const MCSymbol *SectionSym =
Entry->getUnit()->getCrossSectionRelativeBaseAddress()) {
- AP->emitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true);
+ AP->emitLabelPlusOffset(SectionSym, Addr,
+ sizeOf(AP->getDwarfFormParams(), Form), true);
return;
}
- AP->OutStreamer->emitIntValue(Addr, SizeOf(AP, Form));
+ AP->OutStreamer->emitIntValue(Addr, sizeOf(AP->getDwarfFormParams(), Form));
return;
}
default:
@@ -677,7 +684,8 @@ void DIEEntry::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
}
}
-unsigned DIEEntry::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEEntry::sizeOf(const dwarf::FormParams &FormParams,
+ dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_ref1:
return 1;
@@ -690,15 +698,7 @@ unsigned DIEEntry::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
case dwarf::DW_FORM_ref_udata:
return getULEB128Size(Entry->getOffset());
case dwarf::DW_FORM_ref_addr:
- if (AP->getDwarfVersion() == 2)
- return AP->MAI->getCodePointerSize();
- switch (AP->OutStreamer->getContext().getDwarfFormat()) {
- case dwarf::DWARF32:
- return 4;
- case dwarf::DWARF64:
- return 8;
- }
- llvm_unreachable("Invalid DWARF format");
+ return FormParams.getRefAddrByteSize();
default:
llvm_unreachable("Improper form for DIE reference");
@@ -714,12 +714,10 @@ void DIEEntry::print(raw_ostream &O) const {
// DIELoc Implementation
//===----------------------------------------------------------------------===//
-/// ComputeSize - calculate the size of the location expression.
-///
-unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const {
+unsigned DIELoc::computeSize(const dwarf::FormParams &FormParams) const {
if (!Size) {
for (const auto &V : values())
- Size += V.SizeOf(AP);
+ Size += V.sizeOf(FormParams);
}
return Size;
@@ -743,9 +741,9 @@ void DIELoc::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
V.emitValue(Asm);
}
-/// SizeOf - Determine size of location data in bytes.
+/// sizeOf - Determine size of location data in bytes.
///
-unsigned DIELoc::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIELoc::sizeOf(const dwarf::FormParams &, dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
@@ -766,12 +764,10 @@ void DIELoc::print(raw_ostream &O) const {
// DIEBlock Implementation
//===----------------------------------------------------------------------===//
-/// ComputeSize - calculate the size of the block.
-///
-unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const {
+unsigned DIEBlock::computeSize(const dwarf::FormParams &FormParams) const {
if (!Size) {
for (const auto &V : values())
- Size += V.SizeOf(AP);
+ Size += V.sizeOf(FormParams);
}
return Size;
@@ -797,9 +793,9 @@ void DIEBlock::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
V.emitValue(Asm);
}
-/// SizeOf - Determine size of block data in bytes.
+/// sizeOf - Determine size of block data in bytes.
///
-unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEBlock::sizeOf(const dwarf::FormParams &, dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
@@ -820,22 +816,23 @@ void DIEBlock::print(raw_ostream &O) const {
// DIELocList Implementation
//===----------------------------------------------------------------------===//
-unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIELocList::sizeOf(const dwarf::FormParams &FormParams,
+ dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_loclistx:
return getULEB128Size(Index);
case dwarf::DW_FORM_data4:
- assert(!AP->isDwarf64() &&
+ assert(FormParams.Format != dwarf::DWARF64 &&
"DW_FORM_data4 is not suitable to emit a pointer to a location list "
"in the 64-bit DWARF format");
return 4;
case dwarf::DW_FORM_data8:
- assert(AP->isDwarf64() &&
+ assert(FormParams.Format == dwarf::DWARF64 &&
"DW_FORM_data8 is not suitable to emit a pointer to a location list "
"in the 32-bit DWARF format");
return 8;
case dwarf::DW_FORM_sec_offset:
- return AP->getDwarfOffsetByteSize();
+ return FormParams.getDwarfOffsetByteSize();
default:
llvm_unreachable("DIE Value form not supported yet");
}
@@ -860,9 +857,10 @@ void DIELocList::print(raw_ostream &O) const { O << "LocList: " << Index; }
// DIEAddrOffset Implementation
//===----------------------------------------------------------------------===//
-unsigned DIEAddrOffset::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
- return Addr.SizeOf(AP, dwarf::DW_FORM_addrx) +
- Offset.SizeOf(AP, dwarf::DW_FORM_data4);
+unsigned DIEAddrOffset::sizeOf(const dwarf::FormParams &FormParams,
+ dwarf::Form) const {
+ return Addr.sizeOf(FormParams, dwarf::DW_FORM_addrx) +
+ Offset.sizeOf(FormParams, dwarf::DW_FORM_data4);
}
/// EmitValue - Emit label value.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 5f4ee747fcca..e175854f7b93 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -207,6 +207,18 @@ void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag,
computeHash(Entry);
}
+void DIEHash::hashRawTypeReference(const DIE &Entry) {
+ unsigned &DieNumber = Numbering[&Entry];
+ if (DieNumber) {
+ addULEB128('R');
+ addULEB128(DieNumber);
+ return;
+ }
+ DieNumber = Numbering.size();
+ addULEB128('T');
+ computeHash(Entry);
+}
+
// Hash all of the values in a block like set of values. This assumes that
// all of the data is going to be added as integers.
void DIEHash::hashBlockData(const DIE::const_value_range &Values) {
@@ -298,10 +310,10 @@ void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) {
addULEB128(Attribute);
addULEB128(dwarf::DW_FORM_block);
if (Value.getType() == DIEValue::isBlock) {
- addULEB128(Value.getDIEBlock().ComputeSize(AP));
+ addULEB128(Value.getDIEBlock().computeSize(AP->getDwarfFormParams()));
hashBlockData(Value.getDIEBlock().values());
} else if (Value.getType() == DIEValue::isLoc) {
- addULEB128(Value.getDIELoc().ComputeSize(AP));
+ addULEB128(Value.getDIELoc().computeSize(AP->getDwarfFormParams()));
hashBlockData(Value.getDIELoc().values());
} else {
// We could add the block length, but that would take
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
index 29e1da4c5d60..24a973b39271 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -62,6 +62,8 @@ public:
/// Encodes and adds \param Value to the hash as a SLEB128.
void addSLEB128(int64_t Value);
+ void hashRawTypeReference(const DIE &Entry);
+
private:
/// Adds \param Str to the hash and includes a NULL byte.
void addString(StringRef Str);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 4df34d2c9402..18fc46c74eb4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -155,7 +155,8 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) {
if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef &&
Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type &&
- Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_atomic_type)
+ Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_atomic_type &&
+ Tag != dwarf::DW_TAG_immutable_type)
return DDTy->getSizeInBits();
DIType *BaseType = DDTy->getBaseType();
@@ -210,7 +211,8 @@ bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) {
return true;
assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type ||
T == dwarf::DW_TAG_volatile_type ||
- T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type);
+ T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type ||
+ T == dwarf::DW_TAG_immutable_type);
assert(DTy->getBaseType() && "Expected valid base type");
return isUnsignedDIType(DTy->getBaseType());
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 9b73f0ab2f05..5913c687db48 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -127,9 +127,14 @@ unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) {
if (!File)
return Asm->OutStreamer->emitDwarfFileDirective(0, "", "", None, None,
CUID);
- return Asm->OutStreamer->emitDwarfFileDirective(
- 0, File->getDirectory(), File->getFilename(), DD->getMD5AsBytes(File),
- File->getSource(), CUID);
+
+ if (LastFile != File) {
+ LastFile = File;
+ LastFileID = Asm->OutStreamer->emitDwarfFileDirective(
+ 0, File->getDirectory(), File->getFilename(), DD->getMD5AsBytes(File),
+ File->getSource(), CUID);
+ }
+ return LastFileID;
}
DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
@@ -260,9 +265,20 @@ void DwarfCompileUnit::addLocationAttribute(
if (Global) {
const MCSymbol *Sym = Asm->getSymbol(Global);
- unsigned PointerSize = Asm->getDataLayout().getPointerSize();
- assert((PointerSize == 4 || PointerSize == 8) &&
- "Add support for other sizes if necessary");
+ // 16-bit platforms like MSP430 and AVR take this path, so sink this
+ // assert to platforms that use it.
+ auto GetPointerSizedFormAndOp = [this]() {
+ unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+ assert((PointerSize == 4 || PointerSize == 8) &&
+ "Add support for other sizes if necessary");
+ struct FormAndOp {
+ dwarf::Form Form;
+ dwarf::LocationAtom Op;
+ };
+ return PointerSize == 4
+ ? FormAndOp{dwarf::DW_FORM_data4, dwarf::DW_OP_const4u}
+ : FormAndOp{dwarf::DW_FORM_data8, dwarf::DW_OP_const8u};
+ };
if (Global->isThreadLocal()) {
if (Asm->TM.useEmulatedTLS()) {
// TODO: add debug info for emulated thread local mode.
@@ -270,15 +286,12 @@ void DwarfCompileUnit::addLocationAttribute(
// FIXME: Make this work with -gsplit-dwarf.
// Based on GCC's support for TLS:
if (!DD->useSplitDwarf()) {
+ auto FormAndOp = GetPointerSizedFormAndOp();
// 1) Start with a constNu of the appropriate pointer size
- addUInt(*Loc, dwarf::DW_FORM_data1,
- PointerSize == 4 ? dwarf::DW_OP_const4u
- : dwarf::DW_OP_const8u);
+ addUInt(*Loc, dwarf::DW_FORM_data1, FormAndOp.Op);
// 2) containing the (relocated) offset of the TLS variable
// within the module's TLS block.
- addExpr(*Loc,
- PointerSize == 4 ? dwarf::DW_FORM_data4
- : dwarf::DW_FORM_data8,
+ addExpr(*Loc, FormAndOp.Form,
Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
} else {
addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
@@ -292,13 +305,11 @@ void DwarfCompileUnit::addLocationAttribute(
}
} else if (Asm->TM.getRelocationModel() == Reloc::RWPI ||
Asm->TM.getRelocationModel() == Reloc::ROPI_RWPI) {
+ auto FormAndOp = GetPointerSizedFormAndOp();
// Constant
- addUInt(*Loc, dwarf::DW_FORM_data1,
- PointerSize == 4 ? dwarf::DW_OP_const4u
- : dwarf::DW_OP_const8u);
+ addUInt(*Loc, dwarf::DW_FORM_data1, FormAndOp.Op);
// Relocation offset
- addExpr(*Loc, PointerSize == 4 ? dwarf::DW_FORM_data4
- : dwarf::DW_FORM_data8,
+ addExpr(*Loc, FormAndOp.Form,
Asm->getObjFileLowering().getIndirectSymViaRWPI(Sym));
// Base register
Register BaseReg = Asm->getObjFileLowering().getStaticBase();
@@ -1575,7 +1586,8 @@ void DwarfCompileUnit::createBaseTypeDIEs() {
Twine(dwarf::AttributeEncodingString(Btr.Encoding) +
"_" + Twine(Btr.BitSize)).toStringRef(Str));
addUInt(Die, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Btr.Encoding);
- addUInt(Die, dwarf::DW_AT_byte_size, None, Btr.BitSize / 8);
+ // Round up to smallest number of bytes that contains this number of bits.
+ addUInt(Die, dwarf::DW_AT_byte_size, None, divideCeil(Btr.BitSize, 8));
Btr.Die = &Die;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index fb03982b5e4a..f2e1f6346803 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -86,6 +86,9 @@ class DwarfCompileUnit final : public DwarfUnit {
/// DWO ID for correlating skeleton and split units.
uint64_t DWOId = 0;
+ const DIFile *LastFile = nullptr;
+ unsigned LastFileID;
+
/// Construct a DIE for the given DbgVariable without initializing the
/// DbgVariable's DIE reference.
DIE *constructVariableDIEImpl(const DbgVariable &DV, bool Abstract);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 48134f1fd774..680b9586228f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -2539,12 +2539,10 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
if (Op.getDescription().Op[I] == Encoding::SizeNA)
continue;
if (Op.getDescription().Op[I] == Encoding::BaseTypeRef) {
- uint64_t Offset =
- CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die->getOffset();
- assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
- Streamer.emitULEB128(Offset, "", ULEB128PadSize);
+ unsigned Length =
+ Streamer.emitDIERef(*CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die);
// Make sure comments stay aligned.
- for (unsigned J = 0; J < ULEB128PadSize; ++J)
+ for (unsigned J = 0; J < Length; ++J)
if (Comment != End)
Comment++;
} else {
@@ -3369,7 +3367,8 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
// Fast path if we're building some type units and one has already used the
// address pool we know we're going to throw away all this work anyway, so
// don't bother building dependent types.
- if (!TypeUnitsUnderConstruction.empty() && AddrPool.hasBeenUsed())
+ if (!TypeUnitsUnderConstruction.empty() &&
+ (AddrPool.hasBeenUsed() || SeenLocalType))
return;
auto Ins = TypeSignatures.insert(std::make_pair(CTy, 0));
@@ -3380,6 +3379,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
bool TopLevelType = TypeUnitsUnderConstruction.empty();
AddrPool.resetUsedFlag();
+ SeenLocalType = false;
auto OwnedUnit = std::make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder,
getDwoLineTable(CU));
@@ -3423,7 +3423,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
// Types referencing entries in the address table cannot be placed in type
// units.
- if (AddrPool.hasBeenUsed()) {
+ if (AddrPool.hasBeenUsed() || SeenLocalType) {
// Remove all the types built while building this type.
// This is pessimistic as some of these types might not be dependent on
@@ -3451,14 +3451,18 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD)
: DD(DD),
- TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)), AddrPoolUsed(DD->AddrPool.hasBeenUsed()) {
+ TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)),
+ AddrPoolUsed(DD->AddrPool.hasBeenUsed()),
+ SeenLocalType(DD->SeenLocalType) {
DD->TypeUnitsUnderConstruction.clear();
DD->AddrPool.resetUsedFlag();
+ DD->SeenLocalType = false;
}
DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() {
DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction);
DD->AddrPool.resetUsedFlag(AddrPoolUsed);
+ DD->SeenLocalType = SeenLocalType;
}
DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 4e1a1b1e068d..0043000652e8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -433,6 +433,7 @@ private:
DenseMap<const DIStringType *, unsigned> StringTypeLocMap;
AddressPool AddrPool;
+ bool SeenLocalType = false;
/// Accelerator tables.
AccelTable<DWARF5AccelTableData> AccelDebugNames;
@@ -671,6 +672,7 @@ public:
DwarfDebug *DD;
decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction;
bool AddrPoolUsed;
+ bool SeenLocalType;
friend class DwarfDebug;
NonTypeUnitContext(DwarfDebug *DD);
public:
@@ -679,6 +681,7 @@ public:
};
NonTypeUnitContext enterNonTypeUnitContext();
+ void seenLocalType() { SeenLocalType = true; }
/// Add a label so that arange data can be generated for it.
void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
index 40898c9fc855..4defa8a30855 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -98,6 +98,8 @@ class LLVM_LIBRARY_VISIBILITY AIXException : public DwarfCFIExceptionBase {
public:
AIXException(AsmPrinter *A);
+ void markFunctionEnd() override;
+
void endModule() override {}
void beginFunction(const MachineFunction *MF) override {}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 37407c98e75f..ee932d105107 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -681,9 +681,25 @@ void DwarfExpression::emitLegacySExt(unsigned FromBits) {
}
void DwarfExpression::emitLegacyZExt(unsigned FromBits) {
- // (X & (1 << FromBits - 1))
- emitOp(dwarf::DW_OP_constu);
- emitUnsigned((1ULL << FromBits) - 1);
+ // Heuristic to decide the most efficient encoding.
+ // A ULEB can encode 7 1-bits per byte.
+ if (FromBits / 7 < 1+1+1+1+1) {
+ // (X & (1 << FromBits - 1))
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned((1ULL << FromBits) - 1);
+ } else {
+ // Note that the DWARF 4 stack consists of pointer-sized elements,
+ // so technically it doesn't make sense to shift left more than 64
+ // bits. We leave that for the consumer to decide though. LLDB for
+ // example uses APInt for the stack elements and can still deal
+ // with this.
+ emitOp(dwarf::DW_OP_lit1);
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(FromBits);
+ emitOp(dwarf::DW_OP_shl);
+ emitOp(dwarf::DW_OP_lit1);
+ emitOp(dwarf::DW_OP_minus);
+ }
emitOp(dwarf::DW_OP_and);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 838e1c9a10be..a67d0f032cf6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -92,7 +92,8 @@ unsigned DwarfFile::computeSizeAndOffsetsForUnit(DwarfUnit *TheU) {
// Compute the size and offset of a DIE. The offset is relative to start of the
// CU. It returns the offset after laying out the DIE.
unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) {
- return Die.computeOffsetsAndAbbrevs(Asm, Abbrevs, Offset);
+ return Die.computeOffsetsAndAbbrevs(Asm->getDwarfFormParams(), Abbrevs,
+ Offset);
}
void DwarfFile::emitAbbrevs(MCSection *Section) { Abbrevs.Emit(Asm, Section); }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 6b6d63f14f87..15d90c54adfc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -77,7 +77,7 @@ void DIEDwarfExpression::enableTemporaryBuffer() {
void DIEDwarfExpression::disableTemporaryBuffer() { IsBuffering = false; }
unsigned DIEDwarfExpression::getTemporaryBufferSize() {
- return TmpDIE.ComputeSize(&AP);
+ return TmpDIE.computeSize(AP.getDwarfFormParams());
}
void DIEDwarfExpression::commitTemporaryBuffer() { OutDIE.takeValues(TmpDIE); }
@@ -394,14 +394,14 @@ DIE &DwarfUnit::createAndAddDIE(dwarf::Tag Tag, DIE &Parent, const DINode *N) {
}
void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc) {
- Loc->ComputeSize(Asm);
+ Loc->computeSize(Asm->getDwarfFormParams());
DIELocs.push_back(Loc); // Memoize so we can call the destructor later on.
addAttribute(Die, Attribute, Loc->BestForm(DD->getDwarfVersion()), Loc);
}
void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form,
DIEBlock *Block) {
- Block->ComputeSize(Asm);
+ Block->computeSize(Asm->getDwarfFormParams());
DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
addAttribute(Die, Attribute, Form, Block);
}
@@ -597,10 +597,8 @@ DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE,
// Skip updating the accelerator tables since this is not the full type.
if (MDString *TypeId = CTy->getRawIdentifier())
DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
- else {
- auto X = DD->enterNonTypeUnitContext();
+ else
finishNonUnitTypeDIE(TyDIE, CTy);
- }
return &TyDIE;
}
constructTypeDIE(TyDIE, CTy);
@@ -744,6 +742,16 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIStringType *STy) {
addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
}
+ if (DIExpression *Expr = STy->getStringLocationExp()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
+ // This is to describe the memory location of the
+ // string, so lock it down as such.
+ DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.addExpression(Expr);
+ addBlock(Buffer, dwarf::DW_AT_data_location, DwarfExpr.finalize());
+ }
+
if (STy->getEncoding()) {
// For eventual Unicode support.
addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
@@ -1189,7 +1197,7 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
DefinitionArgs = SP->getType()->getTypeArray();
if (DeclArgs.size() && DefinitionArgs.size())
- if (DefinitionArgs[0] != NULL && DeclArgs[0] != DefinitionArgs[0])
+ if (DefinitionArgs[0] != nullptr && DeclArgs[0] != DefinitionArgs[0])
addType(SPDie, DefinitionArgs[0]);
DeclDie = getDIE(SPDecl);
@@ -1842,5 +1850,25 @@ void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) {
StringRef Name = CTy->getName();
if (!Name.empty())
addString(D, dwarf::DW_AT_name, Name);
+ if (Name.startswith("_STN") || !Name.contains('<'))
+ addTemplateParams(D, CTy->getTemplateParams());
+ // If the type is in an anonymous namespace, we can't reference it from a TU
+ // (since the type would be CU local and the TU doesn't specify which TU has
+ // the appropriate type definition) - so flag this emission as such and skip
+ // the rest of the emission now since we're going to throw out all this work
+ // and put the outer/referencing type in the CU instead.
+ // FIXME: Probably good to generalize this to a DICompositeType flag populated
+ // by the frontend, then we could use that to have types that can have
+ // decl+def merged by LTO but where the definition still doesn't go in a type
+ // unit because the type has only one definition.
+ for (DIScope *S = CTy->getScope(); S; S = S->getScope()) {
+ if (auto *NS = dyn_cast<DINamespace>(S)) {
+ if (NS->getName().empty()) {
+ DD->seenLocalType();
+ break;
+ }
+ }
+ }
+ auto X = DD->enterNonTypeUnitContext();
getCU().createTypeDIE(CTy);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 54b0079dd7ce..330f3bacca43 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -25,9 +25,7 @@ namespace llvm {
class ConstantFP;
class ConstantInt;
-class DbgVariable;
class DwarfCompileUnit;
-class MachineOperand;
class MCDwarfDwoLineTable;
class MCSymbol;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
index 7d5e51218693..a92a89084cad 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
@@ -19,8 +19,6 @@
namespace llvm {
class AsmPrinter;
-class MCStreamer;
-class Module;
class DILocation;
class PseudoProbeHandler : public AsmPrinterHandler {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
index 1e3f33e70715..ad8432343a60 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
@@ -27,7 +27,7 @@
using namespace llvm;
-WinCFGuard::WinCFGuard(AsmPrinter *A) : AsmPrinterHandler(), Asm(A) {}
+WinCFGuard::WinCFGuard(AsmPrinter *A) : Asm(A) {}
WinCFGuard::~WinCFGuard() {}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
index 2a4ea92a92aa..95d5dcfbbd0f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
@@ -23,7 +23,6 @@ class BasicBlock;
class MachineBranchProbabilityInfo;
class MachineFunction;
class MachineLoopInfo;
-class MachineModuleInfo;
class MachineRegisterInfo;
class MBFIWrapper;
class ProfileSummaryInfo;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
index 1c2e3f998449..de173a9dfd62 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -347,7 +347,7 @@ bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) {
}
if (ForceFullCFA) {
- MF.getSubtarget().getFrameLowering()->emitCalleeSavedFrameMoves(
+ MF.getSubtarget().getFrameLowering()->emitCalleeSavedFrameMovesFullCFA(
*MBBInfo.MBB, MBBI);
InsertedCFIInstr = true;
PrevMBBInfo = &MBBInfo;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 5f9982cd155d..84a0e4142bb6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -43,9 +43,9 @@ void VirtRegAuxInfo::calculateSpillWeightsAndHints() {
}
// Return the preferred allocation register for reg, given a COPY instruction.
-static Register copyHint(const MachineInstr *MI, unsigned Reg,
- const TargetRegisterInfo &TRI,
- const MachineRegisterInfo &MRI) {
+Register VirtRegAuxInfo::copyHint(const MachineInstr *MI, unsigned Reg,
+ const TargetRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI) {
unsigned Sub, HSub;
Register HReg;
if (MI->getOperand(0).getReg() == Reg) {
@@ -77,9 +77,10 @@ static Register copyHint(const MachineInstr *MI, unsigned Reg,
}
// Check if all values in LI are rematerializable
-static bool isRematerializable(const LiveInterval &LI, const LiveIntervals &LIS,
- const VirtRegMap &VRM,
- const TargetInstrInfo &TII) {
+bool VirtRegAuxInfo::isRematerializable(const LiveInterval &LI,
+ const LiveIntervals &LIS,
+ const VirtRegMap &VRM,
+ const TargetInstrInfo &TII) {
Register Reg = LI.reg();
Register Original = VRM.getOriginal(Reg);
for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 747f4e4fdecc..28f24e5ea908 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -4168,11 +4168,11 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
// We can get through binary operator, if it is legal. In other words, the
// binary operator must have a nuw or nsw flag.
- const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
- if (isa_and_nonnull<OverflowingBinaryOperator>(BinOp) &&
- ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
- (IsSExt && BinOp->hasNoSignedWrap())))
- return true;
+ if (const auto *BinOp = dyn_cast<BinaryOperator>(Inst))
+ if (isa<OverflowingBinaryOperator>(BinOp) &&
+ ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
+ (IsSExt && BinOp->hasNoSignedWrap())))
+ return true;
// ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
if ((Inst->getOpcode() == Instruction::And ||
@@ -4181,10 +4181,10 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
// ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
if (Inst->getOpcode() == Instruction::Xor) {
- const ConstantInt *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1));
// Make sure it is not a NOT.
- if (Cst && !Cst->getValue().isAllOnes())
- return true;
+ if (const auto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)))
+ if (!Cst->getValue().isAllOnes())
+ return true;
}
// zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
index 3bed81d5841d..1d50e1d22b95 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
@@ -90,7 +90,6 @@ CGOPT(bool, EnableAddrsig)
CGOPT(bool, EmitCallSiteInfo)
CGOPT(bool, EnableMachineFunctionSplitter)
CGOPT(bool, EnableDebugEntryValues)
-CGOPT_EXP(bool, ValueTrackingVariableLocations)
CGOPT(bool, ForceDwarfFrameSection)
CGOPT(bool, XRayOmitFunctionIndex)
CGOPT(bool, DebugStrictDwarf)
@@ -433,12 +432,6 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(EnableDebugEntryValues);
- static cl::opt<bool> ValueTrackingVariableLocations(
- "experimental-debug-variable-locations",
- cl::desc("Use experimental new value-tracking variable locations"),
- cl::init(false));
- CGBINDOPT(ValueTrackingVariableLocations);
-
static cl::opt<bool> EnableMachineFunctionSplitter(
"split-machine-functions",
cl::desc("Split out cold basic blocks from machine functions based on "
@@ -539,12 +532,6 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.DebugStrictDwarf = getDebugStrictDwarf();
Options.LoopAlignment = getAlignLoops();
- if (auto Opt = getExplicitValueTrackingVariableLocations())
- Options.ValueTrackingVariableLocations = *Opt;
- else
- Options.ValueTrackingVariableLocations =
- getDefaultValueTrackingVariableLocations(TheTriple);
-
Options.MCOptions = mc::InitMCTargetOptionsFromFlags();
Options.ThreadModel = getThreadModel();
@@ -620,7 +607,7 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
Function &F) {
auto &Ctx = F.getContext();
AttributeList Attrs = F.getAttributes();
- AttrBuilder NewAttrs;
+ AttrBuilder NewAttrs(Ctx);
if (!CPU.empty() && !F.hasFnAttribute("target-cpu"))
NewAttrs.addAttribute("target-cpu", CPU);
@@ -698,8 +685,3 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
setFunctionAttributes(CPU, Features, F);
}
-bool codegen::getDefaultValueTrackingVariableLocations(const llvm::Triple &T) {
- if (T.getArch() == llvm::Triple::x86_64)
- return true;
- return false;
-}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 901409ea9f8f..eb2d449bc4af 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -40,8 +40,7 @@ using namespace llvm;
CriticalAntiDepBreaker::CriticalAntiDepBreaker(MachineFunction &MFi,
const RegisterClassInfo &RCI)
- : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()),
- TII(MF.getSubtarget().getInstrInfo()),
+ : MF(MFi), MRI(MF.getRegInfo()), TII(MF.getSubtarget().getInstrInfo()),
TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI),
Classes(TRI->getNumRegs(), nullptr), KillIndices(TRI->getNumRegs(), 0),
DefIndices(TRI->getNumRegs(), 0), KeepRegs(TRI->getNumRegs(), false) {}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
index 7300ea6b50ee..d9caa8ad42d0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -68,9 +68,16 @@ void ExpandPostRA::TransferImplicitOperands(MachineInstr *MI) {
MachineBasicBlock::iterator CopyMI = MI;
--CopyMI;
- for (const MachineOperand &MO : MI->implicit_operands())
- if (MO.isReg())
- CopyMI->addOperand(MO);
+ Register DstReg = MI->getOperand(0).getReg();
+ for (const MachineOperand &MO : MI->implicit_operands()) {
+ CopyMI->addOperand(MO);
+
+ // Be conservative about preserving kills when subregister defs are
+ // involved. If there was implicit kill of a super-register overlapping the
+ // copy result, we would kill the subregisters previous copies defined.
+ if (MO.isKill() && TRI->regsOverlap(DstReg, MO.getReg()))
+ CopyMI->getOperand(CopyMI->getNumOperands() - 1).setIsKill(false);
+ }
}
bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index 2676becdd807..1a642e233a6a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -191,10 +191,10 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
assert(DstOps.size() == 1 && "Invalid dsts");
if (SrcOps[0].getLLTTy(*getMRI()).isVector()) {
// Try to constant fold vector constants.
- auto VecCst = ConstantFoldVectorBinop(
+ Register VecCst = ConstantFoldVectorBinop(
Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI(), *this);
if (VecCst)
- return MachineInstrBuilder(getMF(), *VecCst);
+ return buildCopy(DstOps[0], VecCst);
break;
}
if (Optional<APInt> Cst = ConstantFoldBinOp(Opc, SrcOps[0].getReg(),
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index d061664e8c5d..1ec7868f2234 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -86,6 +86,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
CallLoweringInfo Info;
const DataLayout &DL = MIRBuilder.getDataLayout();
MachineFunction &MF = MIRBuilder.getMF();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
bool CanBeTailCalled = CB.isTailCall() &&
isInTailCallPosition(CB, MF.getTarget()) &&
(MF.getFunction()
@@ -109,6 +110,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
CanBeTailCalled = false;
}
+
// First step is to marshall all the function's parameters into the correct
// physregs and memory locations. Gather the sequence of argument types that
// we'll pass to the assigner function.
@@ -136,10 +138,23 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
else
Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
+ Register ReturnHintAlignReg;
+ Align ReturnHintAlign;
+
Info.OrigRet = ArgInfo{ResRegs, RetTy, 0, ISD::ArgFlagsTy{}};
- if (!Info.OrigRet.Ty->isVoidTy())
+
+ if (!Info.OrigRet.Ty->isVoidTy()) {
setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB);
+ if (MaybeAlign Alignment = CB.getRetAlign()) {
+ if (*Alignment > Align(1)) {
+ ReturnHintAlignReg = MRI.cloneVirtualRegister(ResRegs[0]);
+ Info.OrigRet.Regs[0] = ReturnHintAlignReg;
+ ReturnHintAlign = *Alignment;
+ }
+ }
+ }
+
Info.CB = &CB;
Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees);
Info.CallConv = CallConv;
@@ -147,7 +162,15 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
Info.IsMustTailCall = CB.isMustTailCall();
Info.IsTailCall = CanBeTailCalled;
Info.IsVarArg = IsVarArg;
- return lowerCall(MIRBuilder, Info);
+ if (!lowerCall(MIRBuilder, Info))
+ return false;
+
+ if (ReturnHintAlignReg && !Info.IsTailCall) {
+ MIRBuilder.buildAssertAlign(ResRegs[0], ReturnHintAlignReg,
+ ReturnHintAlign);
+ }
+
+ return true;
}
template <typename FuncInfoTy>
@@ -509,7 +532,8 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
bool CallLowering::determineAndHandleAssignments(
ValueHandler &Handler, ValueAssigner &Assigner,
SmallVectorImpl<ArgInfo> &Args, MachineIRBuilder &MIRBuilder,
- CallingConv::ID CallConv, bool IsVarArg, Register ThisReturnReg) const {
+ CallingConv::ID CallConv, bool IsVarArg,
+ ArrayRef<Register> ThisReturnRegs) const {
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
SmallVector<CCValAssign, 16> ArgLocs;
@@ -519,7 +543,7 @@ bool CallLowering::determineAndHandleAssignments(
return false;
return handleAssignments(Handler, Args, CCInfo, ArgLocs, MIRBuilder,
- ThisReturnReg);
+ ThisReturnRegs);
}
static unsigned extendOpFromFlags(llvm::ISD::ArgFlagsTy Flags) {
@@ -596,7 +620,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
CCState &CCInfo,
SmallVectorImpl<CCValAssign> &ArgLocs,
MachineIRBuilder &MIRBuilder,
- Register ThisReturnReg) const {
+ ArrayRef<Register> ThisReturnRegs) const {
MachineFunction &MF = MIRBuilder.getMF();
MachineRegisterInfo &MRI = MF.getRegInfo();
const Function &F = MF.getFunction();
@@ -740,10 +764,10 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
assert(!VA.needsCustom() && "custom loc should have been handled already");
- if (i == 0 && ThisReturnReg.isValid() &&
+ if (i == 0 && !ThisReturnRegs.empty() &&
Handler.isIncomingArgumentHandler() &&
isTypeIsValidForThisReturn(ValVT)) {
- Handler.assignValueToReg(Args[i].Regs[i], ThisReturnReg, VA);
+ Handler.assignValueToReg(ArgReg, ThisReturnRegs[Part], VA);
continue;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index dd1ef74e8ad0..30f8838805b5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -56,8 +56,7 @@ class WorkListMaintainer : public GISelChangeObserver {
SmallPtrSet<const MachineInstr *, 4> CreatedInstrs;
public:
- WorkListMaintainer(WorkListTy &WorkList)
- : GISelChangeObserver(), WorkList(WorkList) {}
+ WorkListMaintainer(WorkListTy &WorkList) : WorkList(WorkList) {}
virtual ~WorkListMaintainer() {
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index f7a634dad61a..d6a009744161 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1748,6 +1748,20 @@ void CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
MI.eraseFromParent();
}
+bool CombinerHelper::matchCombineUnmergeUndef(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ unsigned SrcIdx = MI.getNumOperands() - 1;
+ Register SrcReg = MI.getOperand(SrcIdx).getReg();
+ MatchInfo = [&MI](MachineIRBuilder &B) {
+ unsigned NumElems = MI.getNumOperands() - 1;
+ for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
+ Register DstReg = MI.getOperand(Idx).getReg();
+ B.buildUndef(DstReg);
+ }
+ };
+ return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
+}
+
bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
"Expected an unmerge");
@@ -2025,16 +2039,19 @@ void CombinerHelper::applyCombineAddP2IToPtrAdd(
}
bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
- int64_t &NewCst) {
+ APInt &NewCst) {
auto &PtrAdd = cast<GPtrAdd>(MI);
Register LHS = PtrAdd.getBaseReg();
Register RHS = PtrAdd.getOffsetReg();
MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
- if (auto RHSCst = getIConstantVRegSExtVal(RHS, MRI)) {
- int64_t Cst;
+ if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
+ APInt Cst;
if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
- NewCst = Cst + *RHSCst;
+ auto DstTy = MRI.getType(PtrAdd.getReg(0));
+ // G_INTTOPTR uses zero-extension
+ NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
+ NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
return true;
}
}
@@ -2043,7 +2060,7 @@ bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
}
void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
- int64_t &NewCst) {
+ APInt &NewCst) {
auto &PtrAdd = cast<GPtrAdd>(MI);
Register Dst = PtrAdd.getReg(0);
@@ -3875,39 +3892,48 @@ bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI,
LLT Ty = MRI.getType(Dst);
unsigned BitWidth = Ty.getScalarSizeInBits();
- Register ShlSrc, ShlAmt, LShrSrc, LShrAmt;
+ Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
unsigned FshOpc = 0;
- // Match (or (shl x, amt), (lshr y, sub(bw, amt))).
- if (mi_match(
- Dst, MRI,
- // m_GOr() handles the commuted version as well.
- m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
- m_GLShr(m_Reg(LShrSrc), m_GSub(m_SpecificICstOrSplat(BitWidth),
- m_Reg(LShrAmt)))))) {
+ // Match (or (shl ...), (lshr ...)).
+ if (!mi_match(Dst, MRI,
+ // m_GOr() handles the commuted version as well.
+ m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
+ m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
+ return false;
+
+ // Given constants C0 and C1 such that C0 + C1 is bit-width:
+ // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
+ // TODO: Match constant splat.
+ int64_t CstShlAmt, CstLShrAmt;
+ if (mi_match(ShlAmt, MRI, m_ICst(CstShlAmt)) &&
+ mi_match(LShrAmt, MRI, m_ICst(CstLShrAmt)) &&
+ CstShlAmt + CstLShrAmt == BitWidth) {
+ FshOpc = TargetOpcode::G_FSHR;
+ Amt = LShrAmt;
+
+ } else if (mi_match(LShrAmt, MRI,
+ m_GSub(m_SpecificICstOrSplat(BitWidth), m_Reg(Amt))) &&
+ ShlAmt == Amt) {
+ // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
FshOpc = TargetOpcode::G_FSHL;
- // Match (or (shl x, sub(bw, amt)), (lshr y, amt)).
- } else if (mi_match(Dst, MRI,
- m_GOr(m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)),
- m_GShl(m_Reg(ShlSrc),
- m_GSub(m_SpecificICstOrSplat(BitWidth),
- m_Reg(ShlAmt)))))) {
+ } else if (mi_match(ShlAmt, MRI,
+ m_GSub(m_SpecificICstOrSplat(BitWidth), m_Reg(Amt))) &&
+ LShrAmt == Amt) {
+ // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
FshOpc = TargetOpcode::G_FSHR;
} else {
return false;
}
- if (ShlAmt != LShrAmt)
- return false;
-
- LLT AmtTy = MRI.getType(ShlAmt);
+ LLT AmtTy = MRI.getType(Amt);
if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
return false;
MatchInfo = [=](MachineIRBuilder &B) {
- B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, ShlAmt});
+ B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
};
return true;
}
@@ -4127,8 +4153,9 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(
assert(MI.getOpcode() == TargetOpcode::G_AND);
Register Dst = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(Dst);
- if (!getTargetLowering().isConstantUnsignedBitfieldExtactLegal(
- TargetOpcode::G_UBFX, Ty, Ty))
+ LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ if (!getTargetLowering().isConstantUnsignedBitfieldExtractLegal(
+ TargetOpcode::G_UBFX, Ty, ExtractTy))
return false;
int64_t AndImm, LSBImm;
@@ -4148,7 +4175,6 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(
if (static_cast<uint64_t>(LSBImm) >= Size)
return false;
- LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
uint64_t Width = APInt(Size, AndImm).countTrailingOnes();
MatchInfo = [=](MachineIRBuilder &B) {
auto WidthCst = B.buildConstant(ExtractTy, Width);
@@ -4214,8 +4240,9 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd(
const Register Dst = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(Dst);
- if (!getTargetLowering().isConstantUnsignedBitfieldExtactLegal(
- TargetOpcode::G_UBFX, Ty, Ty))
+ LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ if (!getTargetLowering().isConstantUnsignedBitfieldExtractLegal(
+ TargetOpcode::G_UBFX, Ty, ExtractTy))
return false;
// Try to match shr (and x, c1), c2
@@ -4249,8 +4276,8 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd(
return false;
MatchInfo = [=](MachineIRBuilder &B) {
- auto WidthCst = B.buildConstant(Ty, Width);
- auto PosCst = B.buildConstant(Ty, Pos);
+ auto WidthCst = B.buildConstant(ExtractTy, Width);
+ auto PosCst = B.buildConstant(ExtractTy, Pos);
B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
};
return true;
@@ -4850,37 +4877,39 @@ bool CombinerHelper::matchCombineFAddFMulToFMadOrFMA(
if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
return false;
- MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
- MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+ Register Op1 = MI.getOperand(1).getReg();
+ Register Op2 = MI.getOperand(2).getReg();
+ DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
+ DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
unsigned PreferredFusedOpcode =
HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
// If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
// prefer to fold the multiply with fewer uses.
- if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&
- isContractableFMul(*RHS, AllowFusionGlobally)) {
- if (hasMoreUses(*LHS, *RHS, MRI))
+ if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
+ isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
std::swap(LHS, RHS);
}
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
- if (isContractableFMul(*LHS, AllowFusionGlobally) &&
- (Aggressive || MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg()))) {
+ if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
MatchInfo = [=, &MI](MachineIRBuilder &B) {
B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
- {LHS->getOperand(1).getReg(), LHS->getOperand(2).getReg(),
- RHS->getOperand(0).getReg()});
+ {LHS.MI->getOperand(1).getReg(),
+ LHS.MI->getOperand(2).getReg(), RHS.Reg});
};
return true;
}
// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
- if (isContractableFMul(*RHS, AllowFusionGlobally) &&
- (Aggressive || MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg()))) {
+ if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
MatchInfo = [=, &MI](MachineIRBuilder &B) {
B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
- {RHS->getOperand(1).getReg(), RHS->getOperand(2).getReg(),
- LHS->getOperand(0).getReg()});
+ {RHS.MI->getOperand(1).getReg(),
+ RHS.MI->getOperand(2).getReg(), LHS.Reg});
};
return true;
}
@@ -4897,8 +4926,10 @@ bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMA(
return false;
const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
- MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
- MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+ Register Op1 = MI.getOperand(1).getReg();
+ Register Op2 = MI.getOperand(2).getReg();
+ DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
+ DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
LLT DstType = MRI.getType(MI.getOperand(0).getReg());
unsigned PreferredFusedOpcode =
@@ -4906,42 +4937,38 @@ bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMA(
// If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
// prefer to fold the multiply with fewer uses.
- if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&
- isContractableFMul(*RHS, AllowFusionGlobally)) {
- if (hasMoreUses(*LHS, *RHS, MRI))
+ if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
+ isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
std::swap(LHS, RHS);
}
// fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
MachineInstr *FpExtSrc;
- if (mi_match(LHS->getOperand(0).getReg(), MRI,
- m_GFPExt(m_MInstr(FpExtSrc))) &&
+ if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
MatchInfo = [=, &MI](MachineIRBuilder &B) {
auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
- B.buildInstr(
- PreferredFusedOpcode, {MI.getOperand(0).getReg()},
- {FpExtX.getReg(0), FpExtY.getReg(0), RHS->getOperand(0).getReg()});
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
};
return true;
}
// fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
// Note: Commutes FADD operands.
- if (mi_match(RHS->getOperand(0).getReg(), MRI,
- m_GFPExt(m_MInstr(FpExtSrc))) &&
+ if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
MatchInfo = [=, &MI](MachineIRBuilder &B) {
auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
- B.buildInstr(
- PreferredFusedOpcode, {MI.getOperand(0).getReg()},
- {FpExtX.getReg(0), FpExtY.getReg(0), LHS->getOperand(0).getReg()});
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
};
return true;
}
@@ -4957,8 +4984,10 @@ bool CombinerHelper::matchCombineFAddFMAFMulToFMadOrFMA(
if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
return false;
- MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
- MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+ Register Op1 = MI.getOperand(1).getReg();
+ Register Op2 = MI.getOperand(2).getReg();
+ DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
+ DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
unsigned PreferredFusedOpcode =
@@ -4966,31 +4995,31 @@ bool CombinerHelper::matchCombineFAddFMAFMulToFMadOrFMA(
// If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
// prefer to fold the multiply with fewer uses.
- if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&
- isContractableFMul(*RHS, AllowFusionGlobally)) {
- if (hasMoreUses(*LHS, *RHS, MRI))
+ if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
+ isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
std::swap(LHS, RHS);
}
MachineInstr *FMA = nullptr;
Register Z;
// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
- if (LHS->getOpcode() == PreferredFusedOpcode &&
- (MRI.getVRegDef(LHS->getOperand(3).getReg())->getOpcode() ==
+ if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
+ (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
TargetOpcode::G_FMUL) &&
- MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg()) &&
- MRI.hasOneNonDBGUse(LHS->getOperand(3).getReg())) {
- FMA = LHS;
- Z = RHS->getOperand(0).getReg();
+ MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
+ MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
+ FMA = LHS.MI;
+ Z = RHS.Reg;
}
// fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
- else if (RHS->getOpcode() == PreferredFusedOpcode &&
- (MRI.getVRegDef(RHS->getOperand(3).getReg())->getOpcode() ==
+ else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
+ (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
TargetOpcode::G_FMUL) &&
- MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg()) &&
- MRI.hasOneNonDBGUse(RHS->getOperand(3).getReg())) {
- Z = LHS->getOperand(0).getReg();
- FMA = RHS;
+ MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
+ MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
+ Z = LHS.Reg;
+ FMA = RHS.MI;
}
if (FMA) {
@@ -5025,17 +5054,19 @@ bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive(
const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
LLT DstType = MRI.getType(MI.getOperand(0).getReg());
- MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
- MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+ Register Op1 = MI.getOperand(1).getReg();
+ Register Op2 = MI.getOperand(2).getReg();
+ DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
+ DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
unsigned PreferredFusedOpcode =
HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
// If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
// prefer to fold the multiply with fewer uses.
- if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&
- isContractableFMul(*RHS, AllowFusionGlobally)) {
- if (hasMoreUses(*LHS, *RHS, MRI))
+ if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
+ isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
std::swap(LHS, RHS);
}
@@ -5054,16 +5085,17 @@ bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive(
MachineInstr *FMulMI, *FMAMI;
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y, (fma (fpext u), (fpext v), z))
- if (LHS->getOpcode() == PreferredFusedOpcode &&
- mi_match(LHS->getOperand(3).getReg(), MRI, m_GFPExt(m_MInstr(FMulMI))) &&
+ if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
+ mi_match(LHS.MI->getOperand(3).getReg(), MRI,
+ m_GFPExt(m_MInstr(FMulMI))) &&
isContractableFMul(*FMulMI, AllowFusionGlobally) &&
TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
MRI.getType(FMulMI->getOperand(0).getReg()))) {
MatchInfo = [=](MachineIRBuilder &B) {
buildMatchInfo(FMulMI->getOperand(1).getReg(),
- FMulMI->getOperand(2).getReg(),
- RHS->getOperand(0).getReg(), LHS->getOperand(1).getReg(),
- LHS->getOperand(2).getReg(), B);
+ FMulMI->getOperand(2).getReg(), RHS.Reg,
+ LHS.MI->getOperand(1).getReg(),
+ LHS.MI->getOperand(2).getReg(), B);
};
return true;
}
@@ -5073,7 +5105,7 @@ bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive(
// FIXME: This turns two single-precision and one double-precision
// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.
- if (mi_match(LHS->getOperand(0).getReg(), MRI, m_GFPExt(m_MInstr(FMAMI))) &&
+ if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
FMAMI->getOpcode() == PreferredFusedOpcode) {
MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
@@ -5085,8 +5117,7 @@ bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive(
X = B.buildFPExt(DstType, X).getReg(0);
Y = B.buildFPExt(DstType, Y).getReg(0);
buildMatchInfo(FMulMI->getOperand(1).getReg(),
- FMulMI->getOperand(2).getReg(),
- RHS->getOperand(0).getReg(), X, Y, B);
+ FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
};
return true;
@@ -5095,16 +5126,17 @@ bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive(
// fold (fadd z, (fma x, y, (fpext (fmul u, v)))
// -> (fma x, y, (fma (fpext u), (fpext v), z))
- if (RHS->getOpcode() == PreferredFusedOpcode &&
- mi_match(RHS->getOperand(3).getReg(), MRI, m_GFPExt(m_MInstr(FMulMI))) &&
+ if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
+ mi_match(RHS.MI->getOperand(3).getReg(), MRI,
+ m_GFPExt(m_MInstr(FMulMI))) &&
isContractableFMul(*FMulMI, AllowFusionGlobally) &&
TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
MRI.getType(FMulMI->getOperand(0).getReg()))) {
MatchInfo = [=](MachineIRBuilder &B) {
buildMatchInfo(FMulMI->getOperand(1).getReg(),
- FMulMI->getOperand(2).getReg(),
- LHS->getOperand(0).getReg(), RHS->getOperand(1).getReg(),
- RHS->getOperand(2).getReg(), B);
+ FMulMI->getOperand(2).getReg(), LHS.Reg,
+ RHS.MI->getOperand(1).getReg(),
+ RHS.MI->getOperand(2).getReg(), B);
};
return true;
}
@@ -5114,7 +5146,7 @@ bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive(
// FIXME: This turns two single-precision and one double-precision
// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.
- if (mi_match(RHS->getOperand(0).getReg(), MRI, m_GFPExt(m_MInstr(FMAMI))) &&
+ if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
FMAMI->getOpcode() == PreferredFusedOpcode) {
MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
@@ -5126,8 +5158,7 @@ bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive(
X = B.buildFPExt(DstType, X).getReg(0);
Y = B.buildFPExt(DstType, Y).getReg(0);
buildMatchInfo(FMulMI->getOperand(1).getReg(),
- FMulMI->getOperand(2).getReg(),
- LHS->getOperand(0).getReg(), X, Y, B);
+ FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
};
return true;
}
@@ -5144,16 +5175,18 @@ bool CombinerHelper::matchCombineFSubFMulToFMadOrFMA(
if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
return false;
- MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
- MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+ Register Op1 = MI.getOperand(1).getReg();
+ Register Op2 = MI.getOperand(2).getReg();
+ DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
+ DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
// If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
// prefer to fold the multiply with fewer uses.
int FirstMulHasFewerUses = true;
- if (isContractableFMul(*LHS, AllowFusionGlobally) &&
- isContractableFMul(*RHS, AllowFusionGlobally) &&
- hasMoreUses(*LHS, *RHS, MRI))
+ if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
+ isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
+ hasMoreUses(*LHS.MI, *RHS.MI, MRI))
FirstMulHasFewerUses = false;
unsigned PreferredFusedOpcode =
@@ -5161,24 +5194,24 @@ bool CombinerHelper::matchCombineFSubFMulToFMadOrFMA(
// fold (fsub (fmul x, y), z) -> (fma x, y, -z)
if (FirstMulHasFewerUses &&
- (isContractableFMul(*LHS, AllowFusionGlobally) &&
- (Aggressive || MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg())))) {
+ (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
MatchInfo = [=, &MI](MachineIRBuilder &B) {
- Register NegZ = B.buildFNeg(DstTy, RHS->getOperand(0).getReg()).getReg(0);
- B.buildInstr(
- PreferredFusedOpcode, {MI.getOperand(0).getReg()},
- {LHS->getOperand(1).getReg(), LHS->getOperand(2).getReg(), NegZ});
+ Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {LHS.MI->getOperand(1).getReg(),
+ LHS.MI->getOperand(2).getReg(), NegZ});
};
return true;
}
// fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
- else if ((isContractableFMul(*RHS, AllowFusionGlobally) &&
- (Aggressive || MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg())))) {
+ else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
MatchInfo = [=, &MI](MachineIRBuilder &B) {
- Register NegY = B.buildFNeg(DstTy, RHS->getOperand(1).getReg()).getReg(0);
- B.buildInstr(
- PreferredFusedOpcode, {MI.getOperand(0).getReg()},
- {NegY, RHS->getOperand(2).getReg(), LHS->getOperand(0).getReg()});
+ Register NegY =
+ B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
};
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 306af808659a..64c2f0d5f8e4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -37,6 +37,11 @@ Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) {
switch (MI->getOpcode()) {
case TargetOpcode::COPY:
return computeKnownAlignment(MI->getOperand(1).getReg(), Depth);
+ case TargetOpcode::G_ASSERT_ALIGN: {
+ // TODO: Min with source
+ int64_t LogAlign = MI->getOperand(2).getImm();
+ return Align(1ull << LogAlign);
+ }
case TargetOpcode::G_FRAME_INDEX: {
int FrameIdx = MI->getOperand(1).getIndex();
return MF.getFrameInfo().getObjectAlign(FrameIdx);
@@ -466,6 +471,18 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Known.Zero.setBitsFrom(SrcBitWidth);
break;
}
+ case TargetOpcode::G_ASSERT_ALIGN: {
+ int64_t LogOfAlign = MI.getOperand(2).getImm();
+ if (LogOfAlign == 0)
+ break;
+
+ // TODO: Should use maximum with source
+ // If a node is guaranteed to be aligned, set low zero bits accordingly as
+ // well as clearing one bits.
+ Known.Zero.setLowBits(LogOfAlign);
+ Known.One.clearLowBits(LogOfAlign);
+ break;
+ }
case TargetOpcode::G_MERGE_VALUES: {
unsigned NumOps = MI.getNumOperands();
unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index 4ae427484945..e5f95ca5aa73 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -297,10 +297,8 @@ bool InlineAsmLowering::lowerInlineAsm(
GISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
// Compute the value type for each operand.
- if (OpInfo.Type == InlineAsm::isInput ||
- (OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) {
-
- OpInfo.CallOperandVal = const_cast<Value *>(Call.getArgOperand(ArgNo++));
+ if (OpInfo.hasArg()) {
+ OpInfo.CallOperandVal = const_cast<Value *>(Call.getArgOperand(ArgNo));
if (isa<BasicBlock>(OpInfo.CallOperandVal)) {
LLVM_DEBUG(dbgs() << "Basic block input operands not supported yet\n");
@@ -312,10 +310,8 @@ bool InlineAsmLowering::lowerInlineAsm(
// If this is an indirect operand, the operand is a pointer to the
// accessed type.
if (OpInfo.isIndirect) {
- PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
- if (!PtrTy)
- report_fatal_error("Indirect operand for inline asm not a pointer!");
- OpTy = PtrTy->getElementType();
+ OpTy = Call.getAttributes().getParamElementType(ArgNo);
+ assert(OpTy && "Indirect operand must have elementtype attribute");
}
// FIXME: Support aggregate input operands
@@ -327,7 +323,7 @@ bool InlineAsmLowering::lowerInlineAsm(
OpInfo.ConstraintVT =
TLI->getAsmOperandValueType(DL, OpTy, true).getSimpleVT();
-
+ ++ArgNo;
} else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
@@ -627,7 +623,8 @@ bool InlineAsmLowering::lowerInlineAsm(
Register SrcReg = OpInfo.Regs[0];
unsigned SrcSize = TRI->getRegSizeInBits(SrcReg, *MRI);
- if (MRI->getType(ResRegs[i]).getSizeInBits() < SrcSize) {
+ LLT ResTy = MRI->getType(ResRegs[i]);
+ if (ResTy.isScalar() && ResTy.getSizeInBits() < SrcSize) {
// First copy the non-typed virtual register into a generic virtual
// register
Register Tmp1Reg =
@@ -635,9 +632,14 @@ bool InlineAsmLowering::lowerInlineAsm(
MIRBuilder.buildCopy(Tmp1Reg, SrcReg);
// Need to truncate the result of the register
MIRBuilder.buildTrunc(ResRegs[i], Tmp1Reg);
- } else {
+ } else if (ResTy.getSizeInBits() == SrcSize) {
MIRBuilder.buildCopy(ResRegs[i], SrcReg);
+ } else {
+ LLVM_DEBUG(dbgs() << "Unhandled output operand with "
+ "mismatched register size\n");
+ return false;
}
+
break;
}
case TargetLowering::C_Immediate:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index b10c9272a508..2bb5addefe48 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -71,9 +71,10 @@ InstructionSelect::InstructionSelect()
void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
+
if (OptLevel != CodeGenOpt::None) {
- AU.addRequired<GISelKnownBitsAnalysis>();
- AU.addPreserved<GISelKnownBitsAnalysis>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
}
@@ -97,9 +98,8 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
OptLevel = MF.getFunction().hasOptNone() ? CodeGenOpt::None
: MF.getTarget().getOptLevel();
- GISelKnownBits *KB = nullptr;
+ GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
if (OptLevel != CodeGenOpt::None) {
- KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
if (PSI && PSI->hasProfileSummary())
BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index dc5a4d8f85aa..1d0c106fd5db 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -29,7 +29,7 @@
using namespace llvm;
InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers)
- : Renderers(MaxRenderers), MIs() {}
+ : Renderers(MaxRenderers) {}
InstructionSelector::InstructionSelector() = default;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index e8a8efd5dad4..37bc8a65dc7c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -564,7 +564,7 @@ static bool isLibCallInTailPosition(MachineInstr &MI,
// the return. Ignore NoAlias and NonNull because they don't affect the
// call sequence.
AttributeList CallerAttrs = F.getAttributes();
- if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
+ if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
.removeAttribute(Attribute::NoAlias)
.removeAttribute(Attribute::NonNull)
.hasAttributes())
@@ -1677,7 +1677,7 @@ LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
// Widen SrcTy to WideTy. This does not affect the result, but since the
// user requested this size, it is probably better handled than SrcTy and
- // should reduce the total number of legalization artifacts
+ // should reduce the total number of legalization artifacts.
if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
SrcTy = WideTy;
SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
@@ -3655,7 +3655,6 @@ static bool hasSameNumEltsOnAllVectorOperands(
if (!Ty.isVector()) {
if (!is_contained(NonVecOpIndices, OpIdx))
return false;
- is_contained(NonVecOpIndices, OpIdx);
continue;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
index a1acc4195840..328a278f3d68 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -124,14 +124,13 @@ bool Localizer::localizeInterBlock(MachineFunction &MF,
LocalizedInstrs.insert(LocalizedMI);
MachineInstr &UseMI = *MOUse.getParent();
if (MRI->hasOneUse(Reg) && !UseMI.isPHI())
- InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(UseMI), LocalizedMI);
+ InsertMBB->insert(UseMI, LocalizedMI);
else
InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()),
LocalizedMI);
// Set a new register for the definition.
- Register NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg));
- MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg));
+ Register NewReg = MRI->cloneVirtualRegister(Reg);
LocalizedMI->getOperand(0).setReg(NewReg);
NewVRegIt =
MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first;
@@ -174,9 +173,10 @@ bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) {
while (II != MBB.end() && !Users.count(&*II))
++II;
- LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *&*II
- << "\n");
assert(II != MBB.end() && "Didn't find the user in the MBB");
+ LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *II
+ << '\n');
+
MI->removeFromParent();
MBB.insert(II, MI);
Changed = true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 391251886fbb..c6720568b362 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -282,18 +282,6 @@ MachineInstrBuilder MachineIRBuilder::buildCopy(const DstOp &Res,
return buildInstr(TargetOpcode::COPY, Res, Op);
}
-MachineInstrBuilder MachineIRBuilder::buildAssertSExt(const DstOp &Res,
- const SrcOp &Op,
- unsigned Size) {
- return buildInstr(TargetOpcode::G_ASSERT_SEXT, Res, Op).addImm(Size);
-}
-
-MachineInstrBuilder MachineIRBuilder::buildAssertZExt(const DstOp &Res,
- const SrcOp &Op,
- unsigned Size) {
- return buildInstr(TargetOpcode::G_ASSERT_ZEXT, Res, Op).addImm(Size);
-}
-
MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
const ConstantInt &Val) {
LLT Ty = Res.getLLTTy(*getMRI());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 937d94764be1..01af6bb51bb7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -626,7 +626,8 @@ bool RegBankSelect::assignInstr(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
if (isPreISelGenericOptimizationHint(Opc)) {
assert((Opc == TargetOpcode::G_ASSERT_ZEXT ||
- Opc == TargetOpcode::G_ASSERT_SEXT) &&
+ Opc == TargetOpcode::G_ASSERT_SEXT ||
+ Opc == TargetOpcode::G_ASSERT_ALIGN) &&
"Unexpected hint opcode!");
// The only correct mapping for these is to always use the source register
// bank.
@@ -856,7 +857,7 @@ void RegBankSelect::RepairingPlacement::addInsertPoint(
RegBankSelect::InstrInsertPoint::InstrInsertPoint(MachineInstr &Instr,
bool Before)
- : InsertPoint(), Instr(Instr), Before(Before) {
+ : Instr(Instr), Before(Before) {
// Since we do not support splitting, we do not need to update
// liveness and such, so do not do anything with P.
assert((!Before || !Instr.isPHI()) &&
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 4981a537dc7c..544af9a2954f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -592,17 +592,17 @@ Optional<APFloat> llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
return None;
}
-Optional<MachineInstr *>
-llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
- const Register Op2,
- const MachineRegisterInfo &MRI,
- MachineIRBuilder &MIB) {
- auto *SrcVec1 = getOpcodeDef<GBuildVector>(Op1, MRI);
- if (!SrcVec1)
- return None;
+Register llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
+ const Register Op2,
+ const MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIB) {
auto *SrcVec2 = getOpcodeDef<GBuildVector>(Op2, MRI);
if (!SrcVec2)
- return None;
+ return Register();
+
+ auto *SrcVec1 = getOpcodeDef<GBuildVector>(Op1, MRI);
+ if (!SrcVec1)
+ return Register();
const LLT EltTy = MRI.getType(SrcVec1->getSourceReg(0));
@@ -611,14 +611,14 @@ llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
auto MaybeCst = ConstantFoldBinOp(Opcode, SrcVec1->getSourceReg(Idx),
SrcVec2->getSourceReg(Idx), MRI);
if (!MaybeCst)
- return None;
+ return Register();
auto FoldedCstReg = MIB.buildConstant(EltTy, *MaybeCst).getReg(0);
FoldedElements.emplace_back(FoldedCstReg);
}
// Create the new vector constant.
auto CstVec =
MIB.buildBuildVector(MRI.getType(SrcVec1->getReg(0)), FoldedElements);
- return &*CstVec;
+ return CstVec.getReg(0);
}
bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
@@ -704,8 +704,7 @@ Register llvm::getFunctionLiveInPhysReg(MachineFunction &MF,
const TargetInstrInfo &TII,
MCRegister PhysReg,
const TargetRegisterClass &RC,
- LLT RegTy) {
- DebugLoc DL; // FIXME: Is no location the right choice?
+ const DebugLoc &DL, LLT RegTy) {
MachineBasicBlock &EntryMBB = MF.front();
MachineRegisterInfo &MRI = MF.getRegInfo();
Register LiveIn = MRI.getLiveInVirtReg(PhysReg);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index 9fabcfb1f326..2ee9379cb286 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -185,7 +185,7 @@ class Polynomial {
APInt A;
public:
- Polynomial(Value *V) : ErrorMSBs((unsigned)-1), V(V), B(), A() {
+ Polynomial(Value *V) : ErrorMSBs((unsigned)-1), V(V) {
IntegerType *Ty = dyn_cast<IntegerType>(V->getType());
if (Ty) {
ErrorMSBs = 0;
@@ -195,12 +195,12 @@ public:
}
Polynomial(const APInt &A, unsigned ErrorMSBs = 0)
- : ErrorMSBs(ErrorMSBs), V(NULL), B(), A(A) {}
+ : ErrorMSBs(ErrorMSBs), V(nullptr), A(A) {}
Polynomial(unsigned BitWidth, uint64_t A, unsigned ErrorMSBs = 0)
- : ErrorMSBs(ErrorMSBs), V(NULL), B(), A(BitWidth, A) {}
+ : ErrorMSBs(ErrorMSBs), V(nullptr), A(BitWidth, A) {}
- Polynomial() : ErrorMSBs((unsigned)-1), V(NULL), B(), A() {}
+ Polynomial() : ErrorMSBs((unsigned)-1), V(nullptr) {}
/// Increment and clamp the number of undefined bits.
void incErrorMSBs(unsigned amt) {
@@ -677,7 +677,7 @@ public:
FixedVectorType *const VTy;
VectorInfo(FixedVectorType *VTy)
- : BB(nullptr), PV(nullptr), LIs(), Is(), SVI(nullptr), VTy(VTy) {
+ : BB(nullptr), PV(nullptr), SVI(nullptr), VTy(VTy) {
EI = new ElementInfo[VTy->getNumElements()];
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index e97dcca201e8..8a190e769941 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -251,9 +251,10 @@ public:
/// creates DBG_VALUEs and puts them in #Transfers, then prepares the other
/// object fields to track variable locations as we step through the block.
/// FIXME: could just examine mloctracker instead of passing in \p mlocs?
- void loadInlocs(MachineBasicBlock &MBB, ValueIDNum *MLocs,
- SmallVectorImpl<std::pair<DebugVariable, DbgValue>> &VLocs,
- unsigned NumLocs) {
+ void
+ loadInlocs(MachineBasicBlock &MBB, ValueIDNum *MLocs,
+ const SmallVectorImpl<std::pair<DebugVariable, DbgValue>> &VLocs,
+ unsigned NumLocs) {
ActiveMLocs.clear();
ActiveVLocs.clear();
VarLocs.clear();
@@ -272,7 +273,7 @@ public:
};
// Map of the preferred location for each value.
- std::map<ValueIDNum, LocIdx> ValueToLoc;
+ DenseMap<ValueIDNum, LocIdx> ValueToLoc;
ActiveMLocs.reserve(VLocs.size());
ActiveVLocs.reserve(VLocs.size());
@@ -283,6 +284,11 @@ public:
LocIdx Idx = Location.Idx;
ValueIDNum &VNum = MLocs[Idx.asU64()];
VarLocs.push_back(VNum);
+
+ // Short-circuit unnecessary preferred location update.
+ if (VLocs.empty())
+ continue;
+
auto it = ValueToLoc.find(VNum);
// In order of preference, pick:
// * Callee saved registers,
@@ -298,7 +304,7 @@ public:
}
// Now map variables to their picked LocIdxes.
- for (auto Var : VLocs) {
+ for (const auto &Var : VLocs) {
if (Var.second.Kind == DbgValue::Const) {
PendingDbgValues.push_back(
emitMOLoc(*Var.second.MO, Var.first, Var.second.Properties));
@@ -413,7 +419,8 @@ public:
return Reg != SP && Reg != FP;
}
- bool recoverAsEntryValue(const DebugVariable &Var, DbgValueProperties &Prop,
+ bool recoverAsEntryValue(const DebugVariable &Var,
+ const DbgValueProperties &Prop,
const ValueIDNum &Num) {
// Is this variable location a candidate to be an entry value. First,
// should we be trying this at all?
@@ -2799,31 +2806,28 @@ void InstrRefBasedLDV::emitLocations(
}
}
- // We have to insert DBG_VALUEs in a consistent order, otherwise they appeaer
- // in DWARF in different orders. Use the order that they appear when walking
- // through each block / each instruction, stored in AllVarsNumbering.
- auto OrderDbgValues = [&](const MachineInstr *A,
- const MachineInstr *B) -> bool {
- DebugVariable VarA(A->getDebugVariable(), A->getDebugExpression(),
- A->getDebugLoc()->getInlinedAt());
- DebugVariable VarB(B->getDebugVariable(), B->getDebugExpression(),
- B->getDebugLoc()->getInlinedAt());
- return AllVarsNumbering.find(VarA)->second <
- AllVarsNumbering.find(VarB)->second;
- };
-
// Go through all the transfers recorded in the TransferTracker -- this is
// both the live-ins to a block, and any movements of values that happen
// in the middle.
- for (auto &P : TTracker->Transfers) {
- // Sort them according to appearance order.
- llvm::sort(P.Insts, OrderDbgValues);
+ for (const auto &P : TTracker->Transfers) {
+ // We have to insert DBG_VALUEs in a consistent order, otherwise they
+ // appear in DWARF in different orders. Use the order that they appear
+ // when walking through each block / each instruction, stored in
+ // AllVarsNumbering.
+ SmallVector<std::pair<unsigned, MachineInstr *>> Insts;
+ for (MachineInstr *MI : P.Insts) {
+ DebugVariable Var(MI->getDebugVariable(), MI->getDebugExpression(),
+ MI->getDebugLoc()->getInlinedAt());
+ Insts.emplace_back(AllVarsNumbering.find(Var)->second, MI);
+ }
+ llvm::sort(Insts,
+ [](const auto &A, const auto &B) { return A.first < B.first; });
+
// Insert either before or after the designated point...
if (P.MBB) {
MachineBasicBlock &MBB = *P.MBB;
- for (auto *MI : P.Insts) {
- MBB.insert(P.Pos, MI);
- }
+ for (const auto &Pair : Insts)
+ MBB.insert(P.Pos, Pair.second);
} else {
// Terminators, like tail calls, can clobber things. Don't try and place
// transfers after them.
@@ -2831,9 +2835,8 @@ void InstrRefBasedLDV::emitLocations(
continue;
MachineBasicBlock &MBB = *P.Pos->getParent();
- for (auto *MI : P.Insts) {
- MBB.insertAfterBundle(P.Pos, MI);
- }
+ for (const auto &Pair : Insts)
+ MBB.insertAfterBundle(P.Pos, Pair.second);
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
index 789205e61cdb..9e9c0ce394fd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
@@ -494,7 +494,7 @@ public:
return StackIdxesToPos.find(Idx)->second;
}
- unsigned getNumLocs(void) const { return LocIdxToIDNum.size(); }
+ unsigned getNumLocs() const { return LocIdxToIDNum.size(); }
/// Reset all locations to contain a PHI value at the designated block. Used
/// sometimes for actual PHI values, othertimes to indicate the block entry
@@ -516,7 +516,7 @@ public:
}
/// Wipe any un-necessary location records after traversing a block.
- void reset(void) {
+ void reset() {
// We could reset all the location values too; however either loadFromArray
// or setMPhis should be called before this object is re-used. Just
// clear Masks, they're definitely not needed.
@@ -525,7 +525,7 @@ public:
/// Clear all data. Destroys the LocID <=> LocIdx map, which makes most of
/// the information in this pass uninterpretable.
- void clear(void) {
+ void clear() {
reset();
LocIDToLocIdx.clear();
LocIdxToLocID.clear();
@@ -1082,7 +1082,9 @@ template <> struct DenseMapInfo<ValueIDNum> {
return ValueIDNum::TombstoneValue;
}
- static unsigned getHashValue(const ValueIDNum &Val) { return Val.asU64(); }
+ static unsigned getHashValue(const ValueIDNum &Val) {
+ return hash_value(Val.asU64());
+ }
static bool isEqual(const ValueIDNum &A, const ValueIDNum &B) {
return A == B;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
index 691977dc34e6..8f697611a82c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
@@ -40,6 +40,10 @@ static cl::opt<bool>
"normal DBG_VALUE inputs"),
cl::init(false));
+static cl::opt<cl::boolOrDefault> ValueTrackingVariableLocations(
+ "experimental-debug-variable-locations",
+ cl::desc("Use experimental new value-tracking variable locations"));
+
// Options to prevent pathological compile-time behavior. If InputBBLimit and
// InputDbgValueLimit are both exceeded, range extension is disabled.
static cl::opt<unsigned> InputBBLimit(
@@ -117,3 +121,8 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
return TheImpl->ExtendRanges(MF, DomTree, TPC, InputBBLimit,
InputDbgValueLimit);
}
+
+bool llvm::debuginfoShouldUseDebugInstrRef(const Triple &T) {
+ // Enable if explicitly requested on command line.
+ return ValueTrackingVariableLocations == cl::boolOrDefault::BOU_TRUE;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
index a5936c8a96f0..8f0b2ec3e1fc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
@@ -12,6 +12,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/ADT/Triple.h"
namespace llvm {
@@ -35,6 +36,9 @@ public:
// Factory functions for LiveDebugValues implementations.
extern LDVImpl *makeVarLocBasedLiveDebugValues();
extern LDVImpl *makeInstrRefBasedLiveDebugValues();
+
+extern bool debuginfoShouldUseDebugInstrRef(const Triple &T);
+
} // namespace llvm
#endif // LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
index e6661e5135c3..6d806135240e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -152,7 +152,7 @@ public:
}
}
- DbgVariableValue() : LocNoCount(0), WasIndirect(0), WasList(0) {}
+ DbgVariableValue() : LocNoCount(0), WasIndirect(false), WasList(false) {}
DbgVariableValue(const DbgVariableValue &Other)
: LocNoCount(Other.LocNoCount), WasIndirect(Other.getWasIndirect()),
WasList(Other.getWasList()), Expression(Other.getExpression()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
index 2f97386b6d18..9571afa434c1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -827,6 +827,8 @@ CancelKill:
MachineBasicBlock*
LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const {
+ assert(!LI.empty() && "LiveInterval is empty.");
+
// A local live range must be fully contained inside the block, meaning it is
// defined and killed at instructions, not at block boundaries. It is not
// live in or out of any block.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 1a04e1ca56a9..6477965bdc21 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -875,11 +875,11 @@ bool MIParser::parseBasicBlock(MachineBasicBlock &MBB,
// N.B: Multiple lists of successors and liveins are allowed and they're
// merged into one.
// Example:
- // liveins: %edi
- // liveins: %esi
+ // liveins: $edi
+ // liveins: $esi
//
// is equivalent to
- // liveins: %edi, %esi
+ // liveins: $edi, $esi
bool ExplicitSuccessors = false;
while (true) {
if (Token.is(MIToken::kw_successors)) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index d0323eaf3d78..f144639770bc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -182,8 +182,7 @@ static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) {
MIRParserImpl::MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents,
StringRef Filename, LLVMContext &Context,
std::function<void(Function &)> Callback)
- : SM(),
- Context(Context),
+ : Context(Context),
In(SM.getMemoryBuffer(SM.AddNewSourceBuffer(std::move(Contents), SMLoc()))
->getBuffer(),
nullptr, handleYAMLDiag, this),
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
new file mode 100644
index 000000000000..a74c57690640
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
@@ -0,0 +1,862 @@
+//===- MLRegAllocEvictAdvisor.cpp - ML eviction advisor -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the ML eviction advisor and reward injection pass
+//
+//===----------------------------------------------------------------------===//
+
+#include "RegAllocEvictionAdvisor.h"
+#include "RegAllocGreedy.h"
+#include "RegAllocScore.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/Analysis/ModelUnderTrainingRunner.h"
+#include "llvm/Analysis/NoInferenceModelRunner.h"
+#include "llvm/Analysis/ReleaseModeModelRunner.h"
+#include "llvm/Analysis/Utils/TFUtils.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Config/config.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <array>
+#include <memory>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ml-regalloc"
+
+// Generated header in release (AOT) mode
+#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL)
+#include "RegallocEvictModel.h"
+#endif
+
+// Options that only make sense in development mode
+#ifdef LLVM_HAVE_TF_API
+static cl::opt<std::string> TrainingLog(
+ "regalloc-training-log", cl::Hidden,
+ cl::desc("Training log for the register allocator eviction model"));
+
+static cl::opt<std::string> ModelUnderTraining(
+ "regalloc-model", cl::Hidden,
+ cl::desc("The model being trained for register allocation eviction"));
+
+#endif // #ifdef LLVM_HAVE_TF_API
+
+/// The score injection pass.
+/// This pass calculates the score for a function and inserts it in the log, but
+/// this happens only in development mode. It's a no-op otherwise.
+namespace llvm {
+class RegAllocScoring : public MachineFunctionPass {
+public:
+ static char ID;
+
+ RegAllocScoring() : MachineFunctionPass(ID) {
+ initializeRegAllocScoringPass(*PassRegistry::getPassRegistry());
+ }
+
+ ~RegAllocScoring() override = default;
+
+ StringRef getPassName() const override {
+ return "Register Allocation Pass Scoring";
+ }
+
+ /// RegAllocReward analysis usage.
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addRequired<RegAllocEvictionAdvisorAnalysis>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<AAResultsWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// Performs this pass
+ bool runOnMachineFunction(MachineFunction &) override;
+};
+
+char RegAllocScoring::ID = 0;
+FunctionPass *createRegAllocScoringPass() { return new RegAllocScoring(); }
+
+} // namespace llvm
+
+INITIALIZE_PASS(RegAllocScoring, "regallocscoringpass",
+ "Register Allocation Scoring Pass", false, false)
+
+// ===================================
+// Common ML Advisor declarations
+// ===================================
+namespace {
+// This is the maximum number of interfererring ranges. That's the number of
+// distinct AllocationOrder values, which comes from MCRegisterClass::RegsSize.
+// For X86, that's 32.
+// TODO: find a way to get this, statically, in a programmatic way.
+static const int64_t MaxInterferences = 32;
+
+// Logically, we can think of the feature set given to the evaluator as a 2D
+// matrix. The rows are the features (see next). The columns correspond to the
+// interferences. We treat the candidate virt reg as an 'interference', too, as
+// its feature set is the same as that of the interferring ranges. So we'll have
+// MaxInterferences + 1 columns and by convention, we will use the last column
+// for the virt reg seeking allocation.
+static const int64_t CandidateVirtRegPos = MaxInterferences;
+static const int64_t NumberOfInterferences = CandidateVirtRegPos + 1;
+
+// Most features are as described above, so we'll reuse this vector in defining
+// them.
+static const std::vector<int64_t> PerLiveRangeShape{1, NumberOfInterferences};
+
+// --------------
+// Features table
+// --------------
+// For each interfering live range (incl. the candidate) we collect a number of
+// features. However, because the features are of different types (and because
+// of ML best practices), we organize the tensors per feature, not per
+// candidate. Each such tensor has a scalar value corresponding to the
+// interferring live range at that position, in the order in AllocationOrder.
+// The last position corresponds to the virt reg seeking allocation.
+// Exception to all that is the progression feature, which is just a scalar (see
+// its documentation for details).
+// Note on naming: the "_by_max" are normalized using the largest value of that
+// tensor, as observed in the current decision making stage (i.e. for the
+// current call to the advisor's tryFindEvictionCandidate)
+//
+// The feature list format: type, name, shape, documentation.
+// Note: we can really just use int64 and float, hence the modeling of some
+// bools as int64 values.
+#define RA_EVICT_FEATURES_LIST(M) \
+ M(int64_t, mask, PerLiveRangeShape, \
+ "boolean values, 0 for unavailable candidates (i.e. if a position is 0, " \
+ "it " \
+ "can't be evicted)") \
+ M(int64_t, is_free, PerLiveRangeShape, \
+ "boolean values, 1 if this phys reg is actually free (no interferences)") \
+ M(float, nr_urgent, PerLiveRangeShape, \
+ "number of 'urgent' intervals, normalized. Urgent are those that are OK " \
+ "to break cascades") \
+ M(float, nr_broken_hints, PerLiveRangeShape, \
+ "if this position were evicted, how many broken hints would there be") \
+ M(int64_t, is_hint, PerLiveRangeShape, \
+ "is this a preferred phys reg for the candidate") \
+ M(int64_t, is_local, PerLiveRangeShape, \
+ "is this live range local to a basic block") \
+ M(float, nr_rematerializable, PerLiveRangeShape, \
+ "nr rematerializable ranges") \
+ M(float, nr_defs_and_uses, PerLiveRangeShape, \
+ "bb freq - weighed nr defs and uses") \
+ M(float, weighed_reads_by_max, PerLiveRangeShape, \
+ "bb freq - weighed nr of reads, normalized") \
+ M(float, weighed_writes_by_max, PerLiveRangeShape, \
+ "bb feq - weighed nr of writes, normalized") \
+ M(float, weighed_read_writes_by_max, PerLiveRangeShape, \
+ "bb freq - weighed nr of uses that are both read and writes, normalized") \
+ M(float, weighed_indvars_by_max, PerLiveRangeShape, \
+ "bb freq - weighed nr of uses that are indvars, normalized") \
+ M(float, hint_weights_by_max, PerLiveRangeShape, \
+ "bb freq - weighed nr of uses that are hints, normalized") \
+ M(float, start_bb_freq_by_max, PerLiveRangeShape, \
+ "the freq in the start block, normalized") \
+ M(float, end_bb_freq_by_max, PerLiveRangeShape, \
+ "freq of end block, normalized") \
+ M(float, hottest_bb_freq_by_max, PerLiveRangeShape, \
+ "hottest BB freq, normalized") \
+ M(float, liverange_size, PerLiveRangeShape, \
+ "size (instr index diff) of the LR") \
+ M(float, use_def_density, PerLiveRangeShape, \
+ "the max weight, as computed by the manual heuristic") \
+ M(int64_t, max_stage, PerLiveRangeShape, \
+ "largest stage of an interval in this LR") \
+ M(int64_t, min_stage, PerLiveRangeShape, \
+ "lowest stage of an interval in this LR") \
+ M(float, progress, {1}, "ratio of current queue size to initial size")
+
+// The model learns to pick one of the mask == 1 interferences. This is the name
+// of the output tensor.
+// The contract with the model is that the output will be guaranteed to be to a
+// mask == 1 position.
+// Using a macro here to avoid 'not used' warnings (and keep cond compilation to
+// a minimum)
+#define DecisionName "index_to_evict"
+
+// Named features index.
+enum FeatureIDs {
+#define _FEATURE_IDX(_, name, __, ___) name,
+ RA_EVICT_FEATURES_LIST(_FEATURE_IDX)
+#undef _FEATURE_IDX
+ FeatureCount
+};
+
+// The ML advisor will typically have a sparse input to the evaluator, because
+// various phys regs won't be available. It's easier (maintenance-wise) to
+// bulk-reset the state of the evaluator each time we are about to use it again.
+template <typename T> size_t getTotalSize(const std::vector<int64_t> &Shape) {
+ size_t Ret = sizeof(T);
+ for (const auto V : Shape)
+ Ret *= V;
+ return Ret;
+}
+
+void resetInputs(MLModelRunner &Runner) {
+#define _RESET(TYPE, NAME, SHAPE, __) \
+ std::memset(Runner.getTensorUntyped(FeatureIDs::NAME), 0, \
+ getTotalSize<TYPE>(SHAPE));
+ RA_EVICT_FEATURES_LIST(_RESET)
+#undef _RESET
+}
+
+using CandidateRegList =
+ std::array<std::pair<MCRegister, bool>, NumberOfInterferences>;
+using FeaturesListNormalizer = std::array<float, FeatureIDs::FeatureCount>;
+
+/// The ML evictor (commonalities between release and development mode)
+class MLEvictAdvisor : public RegAllocEvictionAdvisor {
+public:
+ MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ MLModelRunner *Runner, const MachineBlockFrequencyInfo &MBFI,
+ const MachineLoopInfo &Loops);
+
+protected:
+ const RegAllocEvictionAdvisor &getDefaultAdvisor() const {
+ return static_cast<const RegAllocEvictionAdvisor &>(DefaultAdvisor);
+ }
+
+ // The assumption is that if the Runner could not be constructed, we emit-ed
+ // error, and we shouldn't be asking for it here.
+ const MLModelRunner &getRunner() const { return *Runner; }
+
+ /// This just calls Evaluate on the Runner, but in the development mode case,
+ /// if we're just capturing the log of the default advisor, it needs to call
+ /// the latter instead, so we need to pass all the necessary parameters for
+ /// it. In the development case, it will also log.
+ virtual int64_t tryFindEvictionCandidatePosition(
+ LiveInterval &VirtReg, const AllocationOrder &Order, unsigned OrderLimit,
+ uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const;
+
+ /// Load the features of the given VirtReg (allocated or not) at column Pos,
+ /// but if that can't be evicted, return false instead.
+ bool
+ loadInterferenceFeatures(LiveInterval &VirtReg, MCRegister PhysReg,
+ bool IsHint, const SmallVirtRegSet &FixedRegisters,
+ std::array<float, FeatureIDs::FeatureCount> &Largest,
+ size_t Pos) const;
+
+private:
+ static float getInitialQueueSize(const MachineFunction &MF);
+
+ MCRegister tryFindEvictionCandidate(
+ LiveInterval &VirtReg, const AllocationOrder &Order,
+ uint8_t CostPerUseLimit,
+ const SmallVirtRegSet &FixedRegisters) const override;
+
+ void extractFeatures(const SmallVectorImpl<LiveInterval *> &Intervals,
+ std::array<float, FeatureIDs::FeatureCount> &Largest,
+ size_t Pos, int64_t IsHint, int64_t LocalIntfsCount,
+ float NrUrgent) const;
+
+ // Point-in-time: we didn't learn this, so we always delegate to the default.
+ bool canEvictHintInterference(
+ LiveInterval &VirtReg, MCRegister PhysReg,
+ const SmallVirtRegSet &FixedRegisters) const override {
+ return getDefaultAdvisor().canEvictHintInterference(VirtReg, PhysReg,
+ FixedRegisters);
+ }
+
+ // Hold on to a default advisor for:
+ // 1) the implementation of canEvictHintInterference, because we didn't learn
+ // that nuance yet;
+ // 2) for bootstrapping (logging) in the development mode case.
+ const DefaultEvictionAdvisor DefaultAdvisor;
+ MLModelRunner *const Runner;
+ const MachineBlockFrequencyInfo &MBFI;
+ const MachineLoopInfo &Loops;
+
+ // Indices of those features we don't want to normalize.
+ // This could be static and shared, but its initialization is non-trivial.
+ std::bitset<FeatureIDs::FeatureCount> DoNotNormalize;
+ const float InitialQSize;
+};
+
+// ===================================
+// Release (AOT) - specifics
+// ===================================
+#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL)
+const std::array<std::string, FeatureIDs::FeatureCount> FeatureNames{
+#define _GETNAME(_, NAME, __, ___) #NAME,
+ RA_EVICT_FEATURES_LIST(_GETNAME)
+#undef _GETNAME
+};
+class ReleaseModeEvictionAdvisorAnalysis final
+ : public RegAllocEvictionAdvisorAnalysis {
+public:
+ ReleaseModeEvictionAdvisorAnalysis()
+ : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Release) {}
+ // support for isa<> and dyn_cast.
+ static bool classof(const RegAllocEvictionAdvisorAnalysis *R) {
+ return R->getAdvisorMode() == AdvisorMode::Release;
+ }
+
+private:
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineLoopInfo>();
+ RegAllocEvictionAdvisorAnalysis::getAnalysisUsage(AU);
+ }
+
+ std::unique_ptr<RegAllocEvictionAdvisor>
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
+ if (!Runner)
+ Runner = std::make_unique<ReleaseModeModelRunner<RegallocEvictModel>>(
+ MF.getFunction().getContext(), FeatureNames, DecisionName);
+ return std::make_unique<MLEvictAdvisor>(
+ MF, RA, Runner.get(), getAnalysis<MachineBlockFrequencyInfo>(),
+ getAnalysis<MachineLoopInfo>());
+ }
+ std::unique_ptr<ReleaseModeModelRunner<RegallocEvictModel>> Runner;
+};
+#endif
+
+// ===================================
+// Development mode-specifics
+// ===================================
+//
+// Features we log
+#ifdef LLVM_HAVE_TF_API
+#define _DECL_FEATURES(type, name, shape, _) \
+ TensorSpec::createSpec<type>(#name, shape),
+
+static const std::vector<TensorSpec> InputFeatures{
+ {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)},
+};
+#undef _DECL_FEATURES
+static const TensorSpec Output =
+ TensorSpec::createSpec<int64_t>(DecisionName, {1});
+static const TensorSpec Reward = TensorSpec::createSpec<float>("reward", {1});
+
+// Features we bind on the model. The tensor names have a prefix, and we also
+// need to include some tensors that are expected to be present by the training
+// algo.
+// TODO: can we just get rid of these?
+#define _DECL_TRAIN_FEATURES(type, name, shape, _) \
+ TensorSpec::createSpec<type>(std::string("action_") + #name, shape),
+
+static const std::vector<TensorSpec> TrainingInputFeatures{
+ {RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES)
+ TensorSpec::createSpec<float>("action_discount", {1}),
+ TensorSpec::createSpec<int32_t>("action_step_type", {1}),
+ TensorSpec::createSpec<float>("action_reward", {1})}};
+#undef _DECL_TRAIN_FEATURES
+
+class DevelopmentModeEvictAdvisor : public MLEvictAdvisor {
+public:
+ DevelopmentModeEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ MLModelRunner *Runner,
+ const MachineBlockFrequencyInfo &MBFI,
+ const MachineLoopInfo &Loops, Logger *Log)
+ : MLEvictAdvisor(MF, RA, Runner, MBFI, Loops), Log(Log) {}
+
+private:
+ int64_t tryFindEvictionCandidatePosition(
+ LiveInterval &VirtReg, const AllocationOrder &Order, unsigned OrderLimit,
+ uint8_t CostPerUseLimit,
+ const SmallVirtRegSet &FixedRegisters) const override;
+
+ Logger *const Log;
+};
+
+class DevelopmentModeEvictionAdvisorAnalysis final
+ : public RegAllocEvictionAdvisorAnalysis {
+public:
+ DevelopmentModeEvictionAdvisorAnalysis()
+ : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Development) {}
+ // support for isa<> and dyn_cast.
+ static bool classof(const RegAllocEvictionAdvisorAnalysis *R) {
+ return R->getAdvisorMode() == AdvisorMode::Development;
+ }
+
+ /// get the logger for the given function, or nullptr if we didn't collect
+ /// one. This is used to inject the score by the RegAllocScoring pass.
+ Logger *getLogger(const MachineFunction &MF) const {
+ auto I = LogMap.find(MF.getName());
+ if (I == LogMap.end())
+ return nullptr;
+ return I->second.get();
+ }
+
+private:
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineLoopInfo>();
+ RegAllocEvictionAdvisorAnalysis::getAnalysisUsage(AU);
+ }
+
+ // Save all the logs (when requested).
+ bool doFinalization(Module &M) override {
+ if (TrainingLog.empty())
+ return false;
+ std::error_code EC;
+ auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC);
+ if (EC) {
+ M.getContext().emitError(EC.message() + ":" + TrainingLog);
+ return false;
+ }
+ Logger::flushLogs(*OS, LogMap);
+ return false;
+ }
+
+ std::unique_ptr<RegAllocEvictionAdvisor>
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ if (ModelUnderTraining.empty() && TrainingLog.empty()) {
+ Ctx.emitError("Regalloc development mode should be requested with at "
+ "least logging enabled and/or a training model");
+ return nullptr;
+ }
+ if (!Runner) {
+ if (ModelUnderTraining.empty())
+ Runner = std::make_unique<NoInferenceModelRunner>(Ctx, InputFeatures);
+ else
+ Runner = ModelUnderTrainingRunner::createAndEnsureValid(
+ Ctx, ModelUnderTraining, DecisionName, TrainingInputFeatures);
+ if (!Runner) {
+ Ctx.emitError("Regalloc: could not set up the model runner");
+ return nullptr;
+ }
+ }
+
+ Logger *Log = nullptr;
+ if (!TrainingLog.empty()) {
+ std::vector<LoggedFeatureSpec> LFS;
+ for (const auto &FS : InputFeatures)
+ LFS.push_back({FS, None});
+ if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(Runner.get()))
+ if (MUTR->outputLoggedFeatureSpecs().size() > 1)
+ append_range(LFS, drop_begin(MUTR->outputLoggedFeatureSpecs()));
+ // We always log the output; in particular, if we're not evaluating, we
+ // don't have an output spec json file. That's why we handle the
+ // 'normal' output separately.
+ LFS.push_back({Output, None});
+ auto I = LogMap.insert(std::make_pair(
+ MF.getFunction().getName(),
+ std::make_unique<Logger>(LFS, Reward, /*IncludeReward*/ true)));
+ assert(I.second);
+ Log = I.first->second.get();
+ }
+ return std::make_unique<DevelopmentModeEvictAdvisor>(
+ MF, RA, Runner.get(), getAnalysis<MachineBlockFrequencyInfo>(),
+ getAnalysis<MachineLoopInfo>(), Log);
+ }
+
+ std::unique_ptr<MLModelRunner> Runner;
+ StringMap<std::unique_ptr<Logger>> LogMap;
+};
+#endif //#ifdef LLVM_HAVE_TF_API
+} // namespace
+
+float MLEvictAdvisor::getInitialQueueSize(const MachineFunction &MF) {
+ auto &MRI = MF.getRegInfo();
+ float Ret = 0.0;
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ Register Reg = Register::index2VirtReg(I);
+ if (MRI.reg_nodbg_empty(Reg))
+ continue;
+ ++Ret;
+ }
+ return Ret;
+}
+
+MLEvictAdvisor::MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ MLModelRunner *Runner,
+ const MachineBlockFrequencyInfo &MBFI,
+ const MachineLoopInfo &Loops)
+ : RegAllocEvictionAdvisor(MF, RA), DefaultAdvisor(MF, RA),
+ Runner(std::move(Runner)), MBFI(MBFI), Loops(Loops),
+ InitialQSize(MLEvictAdvisor::getInitialQueueSize(MF)) {
+ assert(this->Runner);
+ DoNotNormalize.set(FeatureIDs::mask);
+ DoNotNormalize.set(FeatureIDs::is_free);
+ DoNotNormalize.set(FeatureIDs::is_hint);
+ DoNotNormalize.set(FeatureIDs::is_local);
+ DoNotNormalize.set(FeatureIDs::min_stage);
+ DoNotNormalize.set(FeatureIDs::max_stage);
+ DoNotNormalize.set(FeatureIDs::progress);
+}
+
+int64_t MLEvictAdvisor::tryFindEvictionCandidatePosition(
+ LiveInterval &, const AllocationOrder &, unsigned, uint8_t,
+ const SmallVirtRegSet &) const {
+ int64_t Ret = Runner->evaluate<int64_t>();
+ assert(Ret >= 0);
+ assert(Ret <= CandidateVirtRegPos);
+ return Ret;
+}
+
+bool MLEvictAdvisor::loadInterferenceFeatures(
+ LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
+ const SmallVirtRegSet &FixedRegisters, FeaturesListNormalizer &Largest,
+ size_t Pos) const {
+ // It is only possible to evict virtual register interference.
+ if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) {
+ // leave unavailable
+ return false;
+ }
+
+ const bool IsLocal = LIS->intervalIsInOneMBB(VirtReg);
+ int64_t LocalIntfs = 0;
+ float NrUrgent = 0.0f;
+
+ // The cascade tracking is the same as in the default advisor
+ unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg());
+
+ SmallVector<LiveInterval *, MaxInterferences> InterferingIntervals;
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ // Different from the default heuristic, we don't make any assumptions about
+ // what having more than 10 results in the query may mean.
+ const auto &IFIntervals = Q.interferingVRegs();
+ if (IFIntervals.empty() && InterferingIntervals.empty())
+ continue;
+ InterferingIntervals.append(IFIntervals.begin(), IFIntervals.end());
+ for (LiveInterval *Intf : reverse(IFIntervals)) {
+ assert(Register::isVirtualRegister(Intf->reg()) &&
+ "Only expecting virtual register interference from query");
+ // This is the same set of legality checks as in the default case: don't
+ // try to evict fixed regs or 'done' ones. Also don't break cascades,
+ // except in the urgent case, with the same nuances used in the default
+ // heuristic.
+ // We could try sharing this between the advisors, but it may end up
+ // more complex than it is right now.
+ if (FixedRegisters.count(Intf->reg()))
+ return false;
+ if (RA.getExtraInfo().getStage(*Intf) == RS_Done)
+ return false;
+ bool Urgent =
+ !VirtReg.isSpillable() &&
+ (Intf->isSpillable() ||
+ RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg())) <
+ RegClassInfo.getNumAllocatableRegs(
+ MRI->getRegClass(Intf->reg())));
+ // Only evict older cascades or live ranges without a cascade.
+ unsigned IntfCascade = RA.getExtraInfo().getCascade(Intf->reg());
+ if (Cascade <= IntfCascade) {
+ if (!Urgent)
+ return false;
+ ++NrUrgent;
+ }
+
+ LocalIntfs += (IsLocal && LIS->intervalIsInOneMBB(*Intf) &&
+ (!EnableLocalReassign || !canReassign(*Intf, PhysReg)));
+ }
+ }
+ // OK, so if we made it this far, this LR is an eviction candidate, load its
+ // features.
+ extractFeatures(InterferingIntervals, Largest, Pos, IsHint, LocalIntfs,
+ NrUrgent);
+ return true;
+}
+
+MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
+ LiveInterval &VirtReg, const AllocationOrder &Order,
+ uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
+ auto MaybeOrderLimit = getOrderLimit(VirtReg, Order, CostPerUseLimit);
+ if (!MaybeOrderLimit)
+ return MCRegister::NoRegister;
+ unsigned OrderLimit = *MaybeOrderLimit;
+
+ // The heuristic sets initial costs such as, if CostPerUseLimit is
+ // max<uint8_t>, then any of the costs of the legally-evictable intervals
+ // would be lower. When that happens, one of those will be selected.
+ // Therefore, we allow the candidate be selected, unless the candidate is
+ // unspillable, in which case it would be incorrect to not find a register for
+ // it.
+ const bool MustFindEviction =
+ (!VirtReg.isSpillable() && CostPerUseLimit == static_cast<uint8_t>(~0u));
+ // Number of available candidates - if 0, no need to continue.
+ size_t Available = 0;
+ // Make sure we don't have leftover partial state from an attempt where we had
+ // no available candidates and bailed out early.
+ resetInputs(*Runner);
+
+ // Track the index->register mapping because AllocationOrder doesn't do that
+ // and we'd have to scan it.
+ // Also track their mask, to write asserts/debug.
+ CandidateRegList Regs;
+ Regs.fill({0, false});
+
+ // Track the largest value of features seen during this eviction session. We
+ // only normalize (some of) the float features, but it's just simpler to
+ // dimension 'Largest' to all the features, especially since we have the
+ // 'DoNotNormalize' list.
+ FeaturesListNormalizer Largest;
+ Largest.fill(0.0);
+
+ // Same overal idea as in the default eviction policy - we visit the values of
+ // AllocationOrder one at a time. If it's not legally available, we mask off
+ // the corresponding feature column (==do nothing because we already reset all
+ // the features to 0)
+ // Use Pos to capture the column we load features at - in AllocationOrder
+ // order.
+ size_t Pos = 0;
+ for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E;
+ ++I, ++Pos) {
+ MCRegister PhysReg = *I;
+ Regs[Pos] = std::make_pair(PhysReg, true);
+ assert(PhysReg);
+ if (!canAllocatePhysReg(CostPerUseLimit, PhysReg)) {
+ Regs[Pos].second = false;
+ continue;
+ }
+ if (loadInterferenceFeatures(VirtReg, PhysReg, I.isHint(), FixedRegisters,
+ Largest, Pos)) {
+ ++Available;
+ Regs[Pos].second = true;
+ }
+ }
+ if (Available == 0) {
+ // Nothing to decide, nothing to learn.
+ assert(!MustFindEviction);
+ return MCRegister::NoRegister;
+ }
+ // If we must find eviction, the candidate should be masked out of the
+ // decision making process.
+ Regs[CandidateVirtRegPos].second = !MustFindEviction;
+ if (!MustFindEviction)
+ extractFeatures(SmallVector<LiveInterval *, 1>(1, &VirtReg), Largest,
+ CandidateVirtRegPos, /*IsHint*/ 0, /*LocalIntfsCount*/ 0,
+ /*NrUrgent*/ 0.0);
+ assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had "
+ "nothing to allocate initially.");
+ // Normalize the features.
+ for (auto &V : Largest)
+ V = V ? V : 1.0;
+ for (size_t FeatureIndex = 0; FeatureIndex < FeatureIDs::FeatureCount;
+ ++FeatureIndex) {
+ if (DoNotNormalize.test(FeatureIndex))
+ continue;
+ for (size_t Pos = 0; Pos < NumberOfInterferences; ++Pos) {
+ Runner->getTensor<float>(FeatureIndex)[Pos] /= Largest[FeatureIndex];
+ }
+ }
+ *Runner->getTensor<float>(FeatureIDs::progress) =
+ static_cast<float>(RA.getQueueSize()) / InitialQSize;
+
+ // Get a decision.
+ size_t CandidatePos = tryFindEvictionCandidatePosition(
+ VirtReg, Order, OrderLimit, CostPerUseLimit, FixedRegisters);
+ // The contract with the ML side is that CandidatePos is mask == 1 (i.e.
+ // Regs[CandidatePos].second)
+ assert(Regs[CandidatePos].second);
+ if (CandidatePos == CandidateVirtRegPos) {
+ assert(!MustFindEviction);
+ return MCRegister::NoRegister;
+ }
+ return Regs[CandidatePos].first;
+}
+
+// Overall, this currently mimics what we do for weight calculation, but instead
+// of accummulating the various features, we keep them separate.
+void MLEvictAdvisor::extractFeatures(
+ const SmallVectorImpl<LiveInterval *> &Intervals,
+ std::array<float, FeatureIDs::FeatureCount> &Largest, size_t Pos,
+ int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent) const {
+ int64_t NrDefsAndUses = 0;
+ int64_t NrBrokenHints = 0;
+ float R = 0;
+ float W = 0;
+ float RW = 0;
+ float IndVarUpdates = 0;
+ float HintWeights = 0.0;
+ float StartBBFreq = 0.0;
+ float EndBBFreq = 0.0;
+ float HottestBlockFreq = 0.0;
+ int32_t NrRematerializable = 0;
+ float TotalWeight = 0.0;
+
+ SlotIndex EndSI = LIS->getSlotIndexes()->getZeroIndex();
+ SlotIndex StartSI = LIS->getSlotIndexes()->getLastIndex();
+ int64_t MaxStage = 0;
+ int64_t MinStage =
+ Intervals.empty() ? 0 : std::numeric_limits<int64_t>::max();
+
+ for (const auto *L : Intervals) {
+ const LiveInterval &LI = *L;
+ MaxStage = std::max<int64_t>(
+ MaxStage, static_cast<int64_t>(RA.getExtraInfo().getStage(LI)));
+ MinStage = std::min<int64_t>(
+ MinStage, static_cast<int64_t>(RA.getExtraInfo().getStage(LI)));
+
+ TotalWeight = std::max(TotalWeight, LI.weight());
+
+ if (LI.beginIndex() < StartSI)
+ StartSI = LI.beginIndex();
+
+ if (LI.endIndex() > EndSI)
+ EndSI = LI.endIndex();
+
+ SmallPtrSet<MachineInstr *, 8> Visited;
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ NrBrokenHints += VRM->hasPreferredPhys(LI.reg());
+
+ for (MachineRegisterInfo::reg_instr_nodbg_iterator
+ I = MRI->reg_instr_nodbg_begin(LI.reg()),
+ E = MRI->reg_instr_nodbg_end();
+ I != E;) {
+ MachineInstr *MI = &*(I++);
+
+ ++NrDefsAndUses;
+ if (!Visited.insert(MI).second)
+ continue;
+
+ if (MI->isIdentityCopy() || MI->isImplicitDef())
+ continue;
+
+ bool Reads, Writes;
+ std::tie(Reads, Writes) = MI->readsWritesVirtualRegister(LI.reg());
+
+ float Freq = MBFI.getBlockFreqRelativeToEntryBlock(MI->getParent());
+ if (Freq > HottestBlockFreq)
+ HottestBlockFreq = Freq;
+ R += (Reads && !Writes) * Freq;
+ W += (!Reads && Writes) * Freq;
+ RW += (Reads && Writes) * Freq;
+
+ auto *MBB = MI->getParent();
+ auto *Loop = Loops.getLoopFor(MBB);
+ bool IsExiting = Loop ? Loop->isLoopExiting(MBB) : false;
+
+ if (Writes && IsExiting && LIS->isLiveOutOfMBB(LI, MBB))
+ IndVarUpdates += Freq;
+
+ if (MI->isCopy() && VirtRegAuxInfo::copyHint(MI, LI.reg(), TRI, *MRI))
+ HintWeights += Freq;
+ }
+ NrRematerializable += VirtRegAuxInfo::isRematerializable(
+ LI, *LIS, *VRM, *MF.getSubtarget().getInstrInfo());
+ }
+ size_t Size = 0;
+ if (!Intervals.empty()) {
+ StartBBFreq =
+ MBFI.getBlockFreqRelativeToEntryBlock(LIS->getMBBFromIndex(StartSI));
+ if (EndSI >= LIS->getSlotIndexes()->getLastIndex())
+ EndSI = LIS->getSlotIndexes()->getLastIndex().getPrevIndex();
+ EndBBFreq =
+ MBFI.getBlockFreqRelativeToEntryBlock(LIS->getMBBFromIndex(EndSI));
+ Size = StartSI.distance(EndSI);
+ }
+ // Set the features at the column 'Pos'.
+#define SET(ID, TYPE, VAL) \
+ do { \
+ Runner->getTensor<TYPE>(FeatureIDs::ID)[Pos] = static_cast<TYPE>(VAL); \
+ if (!DoNotNormalize.test(FeatureIDs::ID)) \
+ Largest[FeatureIDs::ID] = \
+ std::max(Largest[FeatureIDs::ID], static_cast<float>(VAL)); \
+ } while (false)
+ SET(mask, int64_t, 1);
+ SET(is_free, int64_t, Intervals.empty());
+ SET(nr_urgent, float, NrUrgent);
+ SET(nr_broken_hints, float, NrBrokenHints);
+ SET(is_hint, int64_t, IsHint);
+ SET(is_local, int64_t, LocalIntfsCount);
+ SET(nr_rematerializable, float, NrRematerializable);
+ SET(nr_defs_and_uses, float, NrDefsAndUses);
+ SET(weighed_reads_by_max, float, R);
+ SET(weighed_writes_by_max, float, W);
+ SET(weighed_read_writes_by_max, float, RW);
+ SET(weighed_indvars_by_max, float, IndVarUpdates);
+ SET(hint_weights_by_max, float, HintWeights);
+ SET(start_bb_freq_by_max, float, StartBBFreq);
+ SET(end_bb_freq_by_max, float, EndBBFreq);
+ SET(hottest_bb_freq_by_max, float, HottestBlockFreq);
+ SET(liverange_size, float, Size);
+ SET(use_def_density, float, TotalWeight);
+ SET(max_stage, int64_t, MaxStage);
+ SET(min_stage, int64_t, MinStage);
+#undef SET
+}
+
+// Development mode-specific implementations
+#ifdef LLVM_HAVE_TF_API
+RegAllocEvictionAdvisorAnalysis *llvm::createDevelopmentModeAdvisor() {
+ return new DevelopmentModeEvictionAdvisorAnalysis();
+}
+
+int64_t DevelopmentModeEvictAdvisor::tryFindEvictionCandidatePosition(
+ LiveInterval &VirtReg, const AllocationOrder &Order, unsigned OrderLimit,
+ uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
+ int64_t Ret = 0;
+ if (isa<ModelUnderTrainingRunner>(getRunner())) {
+ Ret = MLEvictAdvisor::tryFindEvictionCandidatePosition(
+ VirtReg, Order, OrderLimit, CostPerUseLimit, FixedRegisters);
+ } else {
+ MCRegister PhysReg = getDefaultAdvisor().tryFindEvictionCandidate(
+ VirtReg, Order, CostPerUseLimit, FixedRegisters);
+ // Find the index of the selected PhysReg. We need it for logging, otherwise
+ // this is wasted cycles (but so would starting development mode without a
+ // model nor logging)
+ if (!PhysReg)
+ Ret = CandidateVirtRegPos;
+ else
+ for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit);
+ I != E; ++I, ++Ret)
+ if (*I == PhysReg)
+ break;
+ }
+ if (TrainingLog.empty())
+ return Ret;
+ size_t CurrentFeature = 0;
+ for (; CurrentFeature < FeatureIDs::FeatureCount; ++CurrentFeature) {
+ Log->logSpecifiedTensorValue(
+ CurrentFeature, reinterpret_cast<const char *>(
+ getRunner().getTensorUntyped(CurrentFeature)));
+ }
+ if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(&getRunner()))
+ for (size_t I = 1; I < MUTR->outputLoggedFeatureSpecs().size();
+ ++I, ++CurrentFeature)
+ Log->logSpecifiedTensorValue(
+ CurrentFeature,
+ reinterpret_cast<const char *>(
+ MUTR->lastEvaluationResult()->getUntypedTensorValue(I)));
+ // The output is right after the features and the extra outputs
+ Log->logInt64Value(CurrentFeature, &Ret);
+ return Ret;
+}
+
+bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) {
+ if (auto *DevModeAnalysis = dyn_cast<DevelopmentModeEvictionAdvisorAnalysis>(
+ &getAnalysis<RegAllocEvictionAdvisorAnalysis>()))
+ if (auto *Log = DevModeAnalysis->getLogger(MF))
+ Log->logFloatFinalReward(static_cast<float>(
+ calculateRegAllocScore(
+ MF, getAnalysis<MachineBlockFrequencyInfo>(),
+ getAnalysis<AAResultsWrapperPass>().getAAResults())
+ .getScore()));
+
+ return false;
+}
+#endif // #ifdef LLVM_HAVE_TF_API
+
+#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL)
+RegAllocEvictionAdvisorAnalysis *llvm::createReleaseModeAdvisor() {
+ return new ReleaseModeEvictionAdvisorAnalysis();
+}
+#endif
+
+// In all cases except development mode, we don't need scoring.
+#if !defined(LLVM_HAVE_TF_API)
+bool RegAllocScoring::runOnMachineFunction(MachineFunction &) { return false; }
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 692587cd58fa..c93ffaabf74c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -96,6 +96,12 @@ static cl::opt<unsigned> AlignAllNonFallThruBlocks(
"format (e.g 4 means align on 16B boundaries)."),
cl::init(0), cl::Hidden);
+static cl::opt<unsigned> MaxBytesForAlignmentOverride(
+ "max-bytes-for-alignment",
+ cl::desc("Forces the maximum bytes allowed to be emitted when padding for "
+ "alignment"),
+ cl::init(0), cl::Hidden);
+
// FIXME: Find a good default for this flag and remove the flag.
static cl::opt<unsigned> ExitBlockBias(
"block-placement-exit-block-bias",
@@ -2929,10 +2935,21 @@ void MachineBlockPlacement::alignBlocks() {
MachineBasicBlock *LayoutPred =
&*std::prev(MachineFunction::iterator(ChainBB));
+ auto DetermineMaxAlignmentPadding = [&]() {
+ // Set the maximum bytes allowed to be emitted for alignment.
+ unsigned MaxBytes;
+ if (MaxBytesForAlignmentOverride.getNumOccurrences() > 0)
+ MaxBytes = MaxBytesForAlignmentOverride;
+ else
+ MaxBytes = TLI->getMaxPermittedBytesForAlignment(ChainBB);
+ ChainBB->setMaxBytesForAlignment(MaxBytes);
+ };
+
// Force alignment if all the predecessors are jumps. We already checked
// that the block isn't cold above.
if (!LayoutPred->isSuccessor(ChainBB)) {
ChainBB->setAlignment(Align);
+ DetermineMaxAlignmentPadding();
continue;
}
@@ -2943,8 +2960,10 @@ void MachineBlockPlacement::alignBlocks() {
BranchProbability LayoutProb =
MBPI->getEdgeProbability(LayoutPred, ChainBB);
BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
- if (LayoutEdgeFreq <= (Freq * ColdProb))
+ if (LayoutEdgeFreq <= (Freq * ColdProb)) {
ChainBB->setAlignment(Align);
+ DetermineMaxAlignmentPadding();
+ }
}
}
@@ -3418,17 +3437,30 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
ComputedEdges.clear();
ChainAllocator.DestroyAll();
+ bool HasMaxBytesOverride =
+ MaxBytesForAlignmentOverride.getNumOccurrences() > 0;
+
if (AlignAllBlock)
// Align all of the blocks in the function to a specific alignment.
- for (MachineBasicBlock &MBB : MF)
- MBB.setAlignment(Align(1ULL << AlignAllBlock));
+ for (MachineBasicBlock &MBB : MF) {
+ if (HasMaxBytesOverride)
+ MBB.setAlignment(Align(1ULL << AlignAllBlock),
+ MaxBytesForAlignmentOverride);
+ else
+ MBB.setAlignment(Align(1ULL << AlignAllBlock));
+ }
else if (AlignAllNonFallThruBlocks) {
// Align all of the blocks that have no fall-through predecessors to a
// specific alignment.
for (auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) {
auto LayoutPred = std::prev(MBI);
- if (!LayoutPred->isSuccessor(&*MBI))
- MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks));
+ if (!LayoutPred->isSuccessor(&*MBI)) {
+ if (HasMaxBytesOverride)
+ MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks),
+ MaxBytesForAlignmentOverride);
+ else
+ MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks));
+ }
}
}
if (ViewBlockLayoutWithBFI != GVDT_None &&
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
index 6ddb1758719b..a39dc79baaa8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
@@ -29,9 +29,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_END(MachineDominanceFrontier, "machine-domfrontier",
"Machine Dominance Frontier Construction", true, true)
-MachineDominanceFrontier::MachineDominanceFrontier()
- : MachineFunctionPass(ID),
- Base() {
+MachineDominanceFrontier::MachineDominanceFrontier() : MachineFunctionPass(ID) {
initializeMachineDominanceFrontierPass(*PassRegistry::getPassRegistry());
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
index 81ed3d0e93ff..fd5ea5cad072 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
@@ -76,6 +76,8 @@
#include <utility>
#include <vector>
+#include "LiveDebugValues/LiveDebugValues.h"
+
using namespace llvm;
#define DEBUG_TYPE "codegen"
@@ -1238,7 +1240,7 @@ bool MachineFunction::useDebugInstrRef() const {
if (F.hasFnAttribute(Attribute::OptimizeNone))
return false;
- if (getTarget().Options.ValueTrackingVariableLocations)
+ if (llvm::debuginfoShouldUseDebugInstrRef(getTarget().getTargetTriple()))
return true;
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
index 6ca97031b92a..759cff179790 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -144,6 +144,10 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
SmallSet<Register, 8> UndefUseSet;
SmallVector<MachineOperand*, 4> Defs;
for (auto MII = FirstMI; MII != LastMI; ++MII) {
+ // Debug instructions have no effects to track.
+ if (MII->isDebugInstr())
+ continue;
+
for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MII->getOperand(i);
if (!MO.isReg())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
index 59fc23983d3d..5347a7b0d890 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -22,8 +22,7 @@
using namespace llvm;
DiagnosticInfoMIROptimization::MachineArgument::MachineArgument(
- StringRef MKey, const MachineInstr &MI)
- : Argument() {
+ StringRef MKey, const MachineInstr &MI) {
Key = std::string(MKey);
raw_string_ostream OS(Val);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
index 54c478645dcf..0dbbc218e946 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
@@ -796,9 +796,14 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
if (Reg == 0)
continue;
- // Don't handle physical register.
- if (Register::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg)) {
+ if (MO.isUse() &&
+ (MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO)))
+ continue;
+
+ // Don't handle non-constant and non-ignorable physical register.
return false;
+ }
// Users for the defs are all dominated by SuccToSinkTo.
if (MO.isDef()) {
@@ -898,7 +903,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
// it could get allocated to something with a def during allocation.
- if (!MRI->isConstantPhysReg(Reg))
+ if (!MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO))
return nullptr;
} else if (!MO.isDead()) {
// A def that isn't dead. We can't move it.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
index aaa6403cc978..f91a9d2c3a32 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -1704,7 +1704,7 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
// Peel out the prologs.
LS.reset();
for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
- LS[I] = 1;
+ LS[I] = true;
Prologs.push_back(peelKernel(LPD_Front));
LiveStages[Prologs.back()] = LS;
AvailableStages[Prologs.back()] = LS;
@@ -1752,7 +1752,7 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
// Move stage one block at a time so that Phi nodes are updated correctly.
for (size_t K = Iteration; K > I; K--)
moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
- LS[Stage] = 1;
+ LS[Stage] = true;
}
LiveStages[Epilogs[I]] = LS;
AvailableStages[Epilogs[I]] = AS;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
index 9ed3471c0fc9..db5217469fba 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/NonRelocatableStringpool.h"
+#include "llvm/ADT/STLExtras.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index e3eb3f825851..74b903f99284 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -97,7 +97,7 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
objcarc::ARCInstKind Kind = objcarc::getAttachedARCFunctionKind(CB);
(void)Kind;
assert((Kind == objcarc::ARCInstKind::RetainRV ||
- Kind == objcarc::ARCInstKind::ClaimRV) &&
+ Kind == objcarc::ARCInstKind::UnsafeClaimRV) &&
"use expected to be the argument of operand bundle "
"\"clang.arc.attachedcall\"");
U.set(FCache.getCallee());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
index 9f1012c95964..87df7bb4a689 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "RegAllocEvictionAdvisor.h"
+#include "RegAllocGreedy.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
@@ -42,6 +43,9 @@ static cl::opt<bool> EnableLocalReassignment(
cl::init(false));
#define DEBUG_TYPE "regalloc"
+#ifdef LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL
+#define LLVM_HAVE_TF_AOT
+#endif
char RegAllocEvictionAdvisorAnalysis::ID = 0;
INITIALIZE_PASS(RegAllocEvictionAdvisorAnalysis, "regalloc-evict",
@@ -62,12 +66,8 @@ public:
private:
std::unique_ptr<RegAllocEvictionAdvisor>
- getAdvisor(const MachineFunction &MF, LiveRegMatrix *Matrix,
- LiveIntervals *LIS, VirtRegMap *VRM,
- const RegisterClassInfo &RegClassInfo,
- ExtraRegInfo *ExtraInfo) override {
- return std::make_unique<DefaultEvictionAdvisor>(MF, Matrix, LIS, VRM,
- RegClassInfo, ExtraInfo);
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
+ return std::make_unique<DefaultEvictionAdvisor>(MF, RA);
}
bool doInitialization(Module &M) override {
if (NotAsRequested)
@@ -86,10 +86,14 @@ template <> Pass *llvm::callDefaultCtor<RegAllocEvictionAdvisorAnalysis>() {
Ret = new DefaultEvictionAdvisorAnalysis(/*NotAsRequested*/ false);
break;
case RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development:
- // TODO(mtrofin): add implementation
+#if defined(LLVM_HAVE_TF_API)
+ Ret = createDevelopmentModeAdvisor();
+#endif
break;
case RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release:
- // TODO(mtrofin): add implementation
+#if defined(LLVM_HAVE_TF_AOT)
+ Ret = createReleaseModeAdvisor();
+#endif
break;
}
if (Ret)
@@ -109,13 +113,12 @@ StringRef RegAllocEvictionAdvisorAnalysis::getPassName() const {
llvm_unreachable("Unknown advisor kind");
}
-RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(
- const MachineFunction &MF, LiveRegMatrix *Matrix, LiveIntervals *LIS,
- VirtRegMap *VRM, const RegisterClassInfo &RegClassInfo,
- ExtraRegInfo *ExtraInfo)
- : MF(MF), Matrix(Matrix), LIS(LIS), VRM(VRM), MRI(&VRM->getRegInfo()),
- TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RegClassInfo),
- RegCosts(TRI->getRegisterCosts(MF)), ExtraInfo(ExtraInfo),
+RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(const MachineFunction &MF,
+ const RAGreedy &RA)
+ : MF(MF), RA(RA), Matrix(RA.getInterferenceMatrix()),
+ LIS(RA.getLiveIntervals()), VRM(RA.getVirtRegMap()),
+ MRI(&VRM->getRegInfo()), TRI(MF.getSubtarget().getRegisterInfo()),
+ RegClassInfo(RA.getRegClassInfo()), RegCosts(TRI->getRegisterCosts(MF)),
EnableLocalReassign(EnableLocalReassignment ||
MF.getSubtarget().enableRALocalReassignment(
MF.getTarget().getOptLevel())) {}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
index debb75ed5020..33e03aed81a7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
@@ -87,87 +87,9 @@ struct EvictionCost {
}
};
-/// Track allocation stage and eviction loop prevention during allocation.
-// TODO(mtrofin): Consider exposing RAGreedy in a header instead, and folding
-// this back into it.
-class ExtraRegInfo final {
- // RegInfo - Keep additional information about each live range.
- struct RegInfo {
- LiveRangeStage Stage = RS_New;
-
- // Cascade - Eviction loop prevention. See
- // canEvictInterferenceBasedOnCost().
- unsigned Cascade = 0;
-
- RegInfo() = default;
- };
-
- IndexedMap<RegInfo, VirtReg2IndexFunctor> Info;
- unsigned NextCascade = 1;
-
-public:
- ExtraRegInfo() = default;
- ExtraRegInfo(const ExtraRegInfo &) = delete;
-
- LiveRangeStage getStage(Register Reg) const { return Info[Reg].Stage; }
-
- LiveRangeStage getStage(const LiveInterval &VirtReg) const {
- return getStage(VirtReg.reg());
- }
-
- void setStage(Register Reg, LiveRangeStage Stage) {
- Info.grow(Reg.id());
- Info[Reg].Stage = Stage;
- }
-
- void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
- setStage(VirtReg.reg(), Stage);
- }
-
- /// Return the current stage of the register, if present, otherwise initialize
- /// it and return that.
- LiveRangeStage getOrInitStage(Register Reg) {
- Info.grow(Reg.id());
- return getStage(Reg);
- }
-
- unsigned getCascade(Register Reg) const { return Info[Reg].Cascade; }
-
- void setCascade(Register Reg, unsigned Cascade) {
- Info.grow(Reg.id());
- Info[Reg].Cascade = Cascade;
- }
-
- unsigned getOrAssignNewCascade(Register Reg) {
- unsigned Cascade = getCascade(Reg);
- if (!Cascade) {
- Cascade = NextCascade++;
- setCascade(Reg, Cascade);
- }
- return Cascade;
- }
-
- unsigned getCascadeOrCurrentNext(Register Reg) const {
- unsigned Cascade = getCascade(Reg);
- if (!Cascade)
- Cascade = NextCascade;
- return Cascade;
- }
-
- template <typename Iterator>
- void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
- for (; Begin != End; ++Begin) {
- Register Reg = *Begin;
- Info.grow(Reg.id());
- if (Info[Reg].Stage == RS_New)
- Info[Reg].Stage = NewStage;
- }
- }
- void LRE_DidCloneVirtReg(Register New, Register Old);
-};
-
/// Interface to the eviction advisor, which is responsible for making a
/// decision as to which live ranges should be evicted (if any).
+class RAGreedy;
class RegAllocEvictionAdvisor {
public:
RegAllocEvictionAdvisor(const RegAllocEvictionAdvisor &) = delete;
@@ -193,14 +115,23 @@ public:
bool isUnusedCalleeSavedReg(MCRegister PhysReg) const;
protected:
- RegAllocEvictionAdvisor(const MachineFunction &MF, LiveRegMatrix *Matrix,
- LiveIntervals *LIS, VirtRegMap *VRM,
- const RegisterClassInfo &RegClassInfo,
- ExtraRegInfo *ExtraInfo);
+ RegAllocEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA);
Register canReassign(LiveInterval &VirtReg, Register PrevReg) const;
+ // Get the upper limit of elements in the given Order we need to analize.
+ // TODO: is this heuristic, we could consider learning it.
+ Optional<unsigned> getOrderLimit(const LiveInterval &VirtReg,
+ const AllocationOrder &Order,
+ unsigned CostPerUseLimit) const;
+
+ // Determine if it's worth trying to allocate this reg, given the
+ // CostPerUseLimit
+ // TODO: this is a heuristic component we could consider learning, too.
+ bool canAllocatePhysReg(unsigned CostPerUseLimit, MCRegister PhysReg) const;
+
const MachineFunction &MF;
+ const RAGreedy &RA;
LiveRegMatrix *const Matrix;
LiveIntervals *const LIS;
VirtRegMap *const VRM;
@@ -208,7 +139,6 @@ protected:
const TargetRegisterInfo *const TRI;
const RegisterClassInfo &RegClassInfo;
const ArrayRef<uint8_t> RegCosts;
- ExtraRegInfo *const ExtraInfo;
/// Run or not the local reassignment heuristic. This information is
/// obtained from the TargetSubtargetInfo.
@@ -243,19 +173,17 @@ public:
/// Get an advisor for the given context (i.e. machine function, etc)
virtual std::unique_ptr<RegAllocEvictionAdvisor>
- getAdvisor(const MachineFunction &MF, LiveRegMatrix *Matrix,
- LiveIntervals *LIS, VirtRegMap *VRM,
- const RegisterClassInfo &RegClassInfo,
- ExtraRegInfo *ExtraInfo) = 0;
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0;
AdvisorMode getAdvisorMode() const { return Mode; }
-private:
+protected:
// This analysis preserves everything, and subclasses may have additional
// requirements.
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
+private:
StringRef getPassName() const override;
const AdvisorMode Mode;
};
@@ -264,25 +192,16 @@ private:
/// an instance of the eviction advisor.
template <> Pass *callDefaultCtor<RegAllocEvictionAdvisorAnalysis>();
-// TODO(mtrofin): implement these.
-#ifdef LLVM_HAVE_TF_AOT
RegAllocEvictionAdvisorAnalysis *createReleaseModeAdvisor();
-#endif
-#ifdef LLVM_HAVE_TF_API
RegAllocEvictionAdvisorAnalysis *createDevelopmentModeAdvisor();
-#endif
// TODO: move to RegAllocEvictionAdvisor.cpp when we move implementation
// out of RegAllocGreedy.cpp
class DefaultEvictionAdvisor : public RegAllocEvictionAdvisor {
public:
- DefaultEvictionAdvisor(const MachineFunction &MF, LiveRegMatrix *Matrix,
- LiveIntervals *LIS, VirtRegMap *VRM,
- const RegisterClassInfo &RegClassInfo,
- ExtraRegInfo *ExtraInfo)
- : RegAllocEvictionAdvisor(MF, Matrix, LIS, VRM, RegClassInfo, ExtraInfo) {
- }
+ DefaultEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA)
+ : RegAllocEvictionAdvisor(MF, RA) {}
private:
MCRegister tryFindEvictionCandidate(LiveInterval &, const AllocationOrder &,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
index ce3cf31dbd6b..6ea6dbcbbb74 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "RegAllocGreedy.h"
#include "AllocationOrder.h"
#include "InterferenceCache.h"
#include "LiveDebugVariables.h"
@@ -135,362 +136,6 @@ static cl::opt<bool> ConsiderLocalIntervalCost(
static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
createGreedyRegisterAllocator);
-namespace {
-
-class RAGreedy : public MachineFunctionPass,
- public RegAllocBase,
- private LiveRangeEdit::Delegate {
- // Convenient shortcuts.
- using PQueue = std::priority_queue<std::pair<unsigned, unsigned>>;
- using SmallLISet = SmallPtrSet<LiveInterval *, 4>;
-
- // context
- MachineFunction *MF;
-
- // Shortcuts to some useful interface.
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- RegisterClassInfo RCI;
-
- // analyses
- SlotIndexes *Indexes;
- MachineBlockFrequencyInfo *MBFI;
- MachineDominatorTree *DomTree;
- MachineLoopInfo *Loops;
- MachineOptimizationRemarkEmitter *ORE;
- EdgeBundles *Bundles;
- SpillPlacement *SpillPlacer;
- LiveDebugVariables *DebugVars;
- AliasAnalysis *AA;
-
- // state
- std::unique_ptr<Spiller> SpillerInstance;
- PQueue Queue;
- std::unique_ptr<VirtRegAuxInfo> VRAI;
- Optional<ExtraRegInfo> ExtraInfo;
- std::unique_ptr<RegAllocEvictionAdvisor> EvictAdvisor;
-
- // Enum CutOffStage to keep a track whether the register allocation failed
- // because of the cutoffs encountered in last chance recoloring.
- // Note: This is used as bitmask. New value should be next power of 2.
- enum CutOffStage {
- // No cutoffs encountered
- CO_None = 0,
-
- // lcr-max-depth cutoff encountered
- CO_Depth = 1,
-
- // lcr-max-interf cutoff encountered
- CO_Interf = 2
- };
-
- uint8_t CutOffInfo;
-
-#ifndef NDEBUG
- static const char *const StageName[];
-#endif
-
- /// EvictionTrack - Keeps track of past evictions in order to optimize region
- /// split decision.
- class EvictionTrack {
-
- public:
- using EvictorInfo =
- std::pair<Register /* evictor */, MCRegister /* physreg */>;
- using EvicteeInfo = llvm::DenseMap<Register /* evictee */, EvictorInfo>;
-
- private:
- /// Each Vreg that has been evicted in the last stage of selectOrSplit will
- /// be mapped to the evictor Vreg and the PhysReg it was evicted from.
- EvicteeInfo Evictees;
-
- public:
- /// Clear all eviction information.
- void clear() { Evictees.clear(); }
-
- /// Clear eviction information for the given evictee Vreg.
- /// E.g. when Vreg get's a new allocation, the old eviction info is no
- /// longer relevant.
- /// \param Evictee The evictee Vreg for whom we want to clear collected
- /// eviction info.
- void clearEvicteeInfo(Register Evictee) { Evictees.erase(Evictee); }
-
- /// Track new eviction.
- /// The Evictor vreg has evicted the Evictee vreg from Physreg.
- /// \param PhysReg The physical register Evictee was evicted from.
- /// \param Evictor The evictor Vreg that evicted Evictee.
- /// \param Evictee The evictee Vreg.
- void addEviction(MCRegister PhysReg, Register Evictor, Register Evictee) {
- Evictees[Evictee].first = Evictor;
- Evictees[Evictee].second = PhysReg;
- }
-
- /// Return the Evictor Vreg which evicted Evictee Vreg from PhysReg.
- /// \param Evictee The evictee vreg.
- /// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if
- /// nobody has evicted Evictee from PhysReg.
- EvictorInfo getEvictor(Register Evictee) {
- if (Evictees.count(Evictee)) {
- return Evictees[Evictee];
- }
-
- return EvictorInfo(0, 0);
- }
- };
-
- // Keeps track of past evictions in order to optimize region split decision.
- EvictionTrack LastEvicted;
-
- // splitting state.
- std::unique_ptr<SplitAnalysis> SA;
- std::unique_ptr<SplitEditor> SE;
-
- /// Cached per-block interference maps
- InterferenceCache IntfCache;
-
- /// All basic blocks where the current register has uses.
- SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints;
-
- /// Global live range splitting candidate info.
- struct GlobalSplitCandidate {
- // Register intended for assignment, or 0.
- MCRegister PhysReg;
-
- // SplitKit interval index for this candidate.
- unsigned IntvIdx;
-
- // Interference for PhysReg.
- InterferenceCache::Cursor Intf;
-
- // Bundles where this candidate should be live.
- BitVector LiveBundles;
- SmallVector<unsigned, 8> ActiveBlocks;
-
- void reset(InterferenceCache &Cache, MCRegister Reg) {
- PhysReg = Reg;
- IntvIdx = 0;
- Intf.setPhysReg(Cache, Reg);
- LiveBundles.clear();
- ActiveBlocks.clear();
- }
-
- // Set B[I] = C for every live bundle where B[I] was NoCand.
- unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) {
- unsigned Count = 0;
- for (unsigned I : LiveBundles.set_bits())
- if (B[I] == NoCand) {
- B[I] = C;
- Count++;
- }
- return Count;
- }
- };
-
- /// Candidate info for each PhysReg in AllocationOrder.
- /// This vector never shrinks, but grows to the size of the largest register
- /// class.
- SmallVector<GlobalSplitCandidate, 32> GlobalCand;
-
- enum : unsigned { NoCand = ~0u };
-
- /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to
- /// NoCand which indicates the stack interval.
- SmallVector<unsigned, 32> BundleCand;
-
- /// Callee-save register cost, calculated once per machine function.
- BlockFrequency CSRCost;
-
- /// Enable or not the consideration of the cost of local intervals created
- /// by a split candidate when choosing the best split candidate.
- bool EnableAdvancedRASplitCost;
-
- /// Set of broken hints that may be reconciled later because of eviction.
- SmallSetVector<LiveInterval *, 8> SetOfBrokenHints;
-
- /// The register cost values. This list will be recreated for each Machine
- /// Function
- ArrayRef<uint8_t> RegCosts;
-
-public:
- RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses);
-
- /// Return the pass name.
- StringRef getPassName() const override { return "Greedy Register Allocator"; }
-
- /// RAGreedy analysis usage.
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- void releaseMemory() override;
- Spiller &spiller() override { return *SpillerInstance; }
- void enqueueImpl(LiveInterval *LI) override;
- LiveInterval *dequeue() override;
- MCRegister selectOrSplit(LiveInterval &,
- SmallVectorImpl<Register> &) override;
- void aboutToRemoveInterval(LiveInterval &) override;
-
- /// Perform register allocation.
- bool runOnMachineFunction(MachineFunction &mf) override;
-
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoPHIs);
- }
-
- MachineFunctionProperties getClearedProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::IsSSA);
- }
-
- static char ID;
-
-private:
- MCRegister selectOrSplitImpl(LiveInterval &, SmallVectorImpl<Register> &,
- SmallVirtRegSet &, unsigned = 0);
-
- bool LRE_CanEraseVirtReg(Register) override;
- void LRE_WillShrinkVirtReg(Register) override;
- void LRE_DidCloneVirtReg(Register, Register) override;
- void enqueue(PQueue &CurQueue, LiveInterval *LI);
- LiveInterval *dequeue(PQueue &CurQueue);
-
- BlockFrequency calcSpillCost();
- bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&);
- bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
- bool growRegion(GlobalSplitCandidate &Cand);
- bool splitCanCauseEvictionChain(Register Evictee, GlobalSplitCandidate &Cand,
- unsigned BBNumber,
- const AllocationOrder &Order);
- bool splitCanCauseLocalSpill(unsigned VirtRegToSplit,
- GlobalSplitCandidate &Cand, unsigned BBNumber,
- const AllocationOrder &Order);
- BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &,
- const AllocationOrder &Order,
- bool *CanCauseEvictionChain);
- bool calcCompactRegion(GlobalSplitCandidate&);
- void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>);
- void calcGapWeights(MCRegister, SmallVectorImpl<float> &);
- bool canEvictInterferenceInRange(const LiveInterval &VirtReg,
- MCRegister PhysReg, SlotIndex Start,
- SlotIndex End, EvictionCost &MaxCost) const;
- MCRegister getCheapestEvicteeWeight(const AllocationOrder &Order,
- const LiveInterval &VirtReg,
- SlotIndex Start, SlotIndex End,
- float *BestEvictWeight) const;
- void evictInterference(LiveInterval &, MCRegister,
- SmallVectorImpl<Register> &);
- bool mayRecolorAllInterferences(MCRegister PhysReg, LiveInterval &VirtReg,
- SmallLISet &RecoloringCandidates,
- const SmallVirtRegSet &FixedRegisters);
-
- MCRegister tryAssign(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<Register>&,
- const SmallVirtRegSet&);
- MCRegister tryFindEvictionCandidate(LiveInterval &, const AllocationOrder &,
- uint8_t, const SmallVirtRegSet &) const;
- MCRegister tryEvict(LiveInterval &, AllocationOrder &,
- SmallVectorImpl<Register> &, uint8_t,
- const SmallVirtRegSet &);
- MCRegister tryRegionSplit(LiveInterval &, AllocationOrder &,
- SmallVectorImpl<Register> &);
- /// Calculate cost of region splitting.
- unsigned calculateRegionSplitCost(LiveInterval &VirtReg,
- AllocationOrder &Order,
- BlockFrequency &BestCost,
- unsigned &NumCands, bool IgnoreCSR,
- bool *CanCauseEvictionChain = nullptr);
- /// Perform region splitting.
- unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
- bool HasCompact,
- SmallVectorImpl<Register> &NewVRegs);
- /// Check other options before using a callee-saved register for the first
- /// time.
- MCRegister tryAssignCSRFirstTime(LiveInterval &VirtReg,
- AllocationOrder &Order, MCRegister PhysReg,
- uint8_t &CostPerUseLimit,
- SmallVectorImpl<Register> &NewVRegs);
- void initializeCSRCost();
- unsigned tryBlockSplit(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<Register>&);
- unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<Register>&);
- unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<Register>&);
- unsigned trySplit(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<Register>&,
- const SmallVirtRegSet&);
- unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &,
- SmallVectorImpl<Register> &,
- SmallVirtRegSet &, unsigned);
- bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<Register> &,
- SmallVirtRegSet &, unsigned);
- void tryHintRecoloring(LiveInterval &);
- void tryHintsRecoloring();
-
- /// Model the information carried by one end of a copy.
- struct HintInfo {
- /// The frequency of the copy.
- BlockFrequency Freq;
- /// The virtual register or physical register.
- Register Reg;
- /// Its currently assigned register.
- /// In case of a physical register Reg == PhysReg.
- MCRegister PhysReg;
-
- HintInfo(BlockFrequency Freq, Register Reg, MCRegister PhysReg)
- : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {}
- };
- using HintsInfo = SmallVector<HintInfo, 4>;
-
- BlockFrequency getBrokenHintFreq(const HintsInfo &, MCRegister);
- void collectHintInfo(Register, HintsInfo &);
-
- /// Greedy RA statistic to remark.
- struct RAGreedyStats {
- unsigned Reloads = 0;
- unsigned FoldedReloads = 0;
- unsigned ZeroCostFoldedReloads = 0;
- unsigned Spills = 0;
- unsigned FoldedSpills = 0;
- unsigned Copies = 0;
- float ReloadsCost = 0.0f;
- float FoldedReloadsCost = 0.0f;
- float SpillsCost = 0.0f;
- float FoldedSpillsCost = 0.0f;
- float CopiesCost = 0.0f;
-
- bool isEmpty() {
- return !(Reloads || FoldedReloads || Spills || FoldedSpills ||
- ZeroCostFoldedReloads || Copies);
- }
-
- void add(RAGreedyStats other) {
- Reloads += other.Reloads;
- FoldedReloads += other.FoldedReloads;
- ZeroCostFoldedReloads += other.ZeroCostFoldedReloads;
- Spills += other.Spills;
- FoldedSpills += other.FoldedSpills;
- Copies += other.Copies;
- ReloadsCost += other.ReloadsCost;
- FoldedReloadsCost += other.FoldedReloadsCost;
- SpillsCost += other.SpillsCost;
- FoldedSpillsCost += other.FoldedSpillsCost;
- CopiesCost += other.CopiesCost;
- }
-
- void report(MachineOptimizationRemarkMissed &R);
- };
-
- /// Compute statistic for a basic block.
- RAGreedyStats computeStats(MachineBasicBlock &MBB);
-
- /// Compute and report statistic through a remark.
- RAGreedyStats reportStats(MachineLoop *L);
-
- /// Report the statistic for each loop.
- void reportStats();
-};
-
-} // end anonymous namespace
-
char RAGreedy::ID = 0;
char &llvm::RAGreedyID = RAGreedy::ID;
@@ -613,7 +258,7 @@ void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
ExtraInfo->LRE_DidCloneVirtReg(New, Old);
}
-void ExtraRegInfo::LRE_DidCloneVirtReg(Register New, Register Old) {
+void RAGreedy::ExtraRegInfo::LRE_DidCloneVirtReg(Register New, Register Old) {
// Cloning a register we haven't even heard about yet? Just ignore it.
if (!Info.inBounds(Old))
return;
@@ -811,7 +456,7 @@ Register RegAllocEvictionAdvisor::canReassign(LiveInterval &VirtReg,
bool DefaultEvictionAdvisor::shouldEvict(LiveInterval &A, bool IsHint,
LiveInterval &B,
bool BreaksHint) const {
- bool CanSplit = ExtraInfo->getStage(B) < RS_Spill;
+ bool CanSplit = RA.getExtraInfo().getStage(B) < RS_Spill;
// Be fairly aggressive about following hints as long as the evictee can be
// split.
@@ -852,7 +497,7 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
return false;
- bool IsLocal = LIS->intervalIsInOneMBB(VirtReg);
+ bool IsLocal = VirtReg.empty() || LIS->intervalIsInOneMBB(VirtReg);
// Find VirtReg's cascade number. This will be unassigned if VirtReg was never
// involved in an eviction before. If a cascade number was assigned, deny
@@ -861,7 +506,7 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
//
// This works out so a register without a cascade number is allowed to evict
// anything, and it can be evicted by anything.
- unsigned Cascade = ExtraInfo->getCascadeOrCurrentNext(VirtReg.reg());
+ unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg());
EvictionCost Cost;
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
@@ -883,7 +528,7 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
return false;
// Never evict spill products. They cannot split or spill.
- if (ExtraInfo->getStage(*Intf) == RS_Done)
+ if (RA.getExtraInfo().getStage(*Intf) == RS_Done)
return false;
// Once a live range becomes small enough, it is urgent that we find a
// register for it. This is indicated by an infinite spill weight. These
@@ -898,7 +543,7 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
RegClassInfo.getNumAllocatableRegs(
MRI->getRegClass(Intf->reg())));
// Only evict older cascades or live ranges without a cascade.
- unsigned IntfCascade = ExtraInfo->getCascade(Intf->reg());
+ unsigned IntfCascade = RA.getExtraInfo().getCascade(Intf->reg());
if (Cascade <= IntfCascade) {
if (!Urgent)
return false;
@@ -1069,28 +714,20 @@ bool RegAllocEvictionAdvisor::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
return !Matrix->isPhysRegUsed(PhysReg);
}
-MCRegister DefaultEvictionAdvisor::tryFindEvictionCandidate(
- LiveInterval &VirtReg, const AllocationOrder &Order,
- uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
- // Keep track of the cheapest interference seen so far.
- EvictionCost BestCost;
- BestCost.setMax();
- MCRegister BestPhys;
+Optional<unsigned>
+RegAllocEvictionAdvisor::getOrderLimit(const LiveInterval &VirtReg,
+ const AllocationOrder &Order,
+ unsigned CostPerUseLimit) const {
unsigned OrderLimit = Order.getOrder().size();
- // When we are just looking for a reduced cost per use, don't break any
- // hints, and only evict smaller spill weights.
if (CostPerUseLimit < uint8_t(~0u)) {
- BestCost.BrokenHints = 0;
- BestCost.MaxWeight = VirtReg.weight();
-
// Check of any registers in RC are below CostPerUseLimit.
const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg());
uint8_t MinCost = RegClassInfo.getMinCost(RC);
if (MinCost >= CostPerUseLimit) {
LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
<< MinCost << ", no cheaper registers to be found.\n");
- return 0;
+ return None;
}
// It is normal for register classes to have a long tail of registers with
@@ -1101,24 +738,50 @@ MCRegister DefaultEvictionAdvisor::tryFindEvictionCandidate(
<< " regs.\n");
}
}
+ return OrderLimit;
+}
+
+bool RegAllocEvictionAdvisor::canAllocatePhysReg(unsigned CostPerUseLimit,
+ MCRegister PhysReg) const {
+ if (RegCosts[PhysReg] >= CostPerUseLimit)
+ return false;
+ // The first use of a callee-saved register in a function has cost 1.
+ // Don't start using a CSR when the CostPerUseLimit is low.
+ if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
+ LLVM_DEBUG(
+ dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
+ << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
+ << '\n');
+ return false;
+ }
+ return true;
+}
+
+MCRegister DefaultEvictionAdvisor::tryFindEvictionCandidate(
+ LiveInterval &VirtReg, const AllocationOrder &Order,
+ uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
+ // Keep track of the cheapest interference seen so far.
+ EvictionCost BestCost;
+ BestCost.setMax();
+ MCRegister BestPhys;
+ auto MaybeOrderLimit = getOrderLimit(VirtReg, Order, CostPerUseLimit);
+ if (!MaybeOrderLimit)
+ return MCRegister::NoRegister;
+ unsigned OrderLimit = *MaybeOrderLimit;
+
+ // When we are just looking for a reduced cost per use, don't break any
+ // hints, and only evict smaller spill weights.
+ if (CostPerUseLimit < uint8_t(~0u)) {
+ BestCost.BrokenHints = 0;
+ BestCost.MaxWeight = VirtReg.weight();
+ }
for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E;
++I) {
MCRegister PhysReg = *I;
assert(PhysReg);
- if (RegCosts[PhysReg] >= CostPerUseLimit)
- continue;
- // The first use of a callee-saved register in a function has cost 1.
- // Don't start using a CSR when the CostPerUseLimit is low.
- if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
- LLVM_DEBUG(
- dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
- << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
- << '\n');
- continue;
- }
-
- if (!canEvictInterferenceBasedOnCost(VirtReg, PhysReg, false, BestCost,
+ if (!canAllocatePhysReg(CostPerUseLimit, PhysReg) ||
+ !canEvictInterferenceBasedOnCost(VirtReg, PhysReg, false, BestCost,
FixedRegisters))
continue;
@@ -3269,8 +2932,8 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
ExtraInfo.emplace();
- EvictAdvisor = getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(
- *MF, Matrix, LIS, VRM, RegClassInfo, &*ExtraInfo);
+ EvictAdvisor =
+ getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this);
IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
GlobalCand.resize(32); // This will grow as needed.
SetOfBrokenHints.clear();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h
new file mode 100644
index 000000000000..e9a5fe635f26
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h
@@ -0,0 +1,507 @@
+//==- RegAllocGreedy.h ------- greedy register allocator ----------*-C++-*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This file defines the RAGreedy function pass for register allocation in
+// optimized builds.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCGREEDY_H_
+#define LLVM_CODEGEN_REGALLOCGREEDY_H_
+
+#include "AllocationOrder.h"
+#include "InterferenceCache.h"
+#include "LiveDebugVariables.h"
+#include "RegAllocBase.h"
+#include "RegAllocEvictionAdvisor.h"
+#include "SpillPlacement.h"
+#include "SplitKit.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/LiveStacks.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/Spiller.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <queue>
+#include <tuple>
+#include <utility>
+
+namespace llvm {
+class LLVM_LIBRARY_VISIBILITY RAGreedy : public MachineFunctionPass,
+ public RegAllocBase,
+ private LiveRangeEdit::Delegate {
+ // Interface to eviction advisers
+public:
+ /// Track allocation stage and eviction loop prevention during allocation.
+ class ExtraRegInfo final {
+ // RegInfo - Keep additional information about each live range.
+ struct RegInfo {
+ LiveRangeStage Stage = RS_New;
+
+ // Cascade - Eviction loop prevention. See
+ // canEvictInterferenceBasedOnCost().
+ unsigned Cascade = 0;
+
+ RegInfo() = default;
+ };
+
+ IndexedMap<RegInfo, VirtReg2IndexFunctor> Info;
+ unsigned NextCascade = 1;
+
+ public:
+ ExtraRegInfo() = default;
+ ExtraRegInfo(const ExtraRegInfo &) = delete;
+
+ LiveRangeStage getStage(Register Reg) const { return Info[Reg].Stage; }
+
+ LiveRangeStage getStage(const LiveInterval &VirtReg) const {
+ return getStage(VirtReg.reg());
+ }
+
+ void setStage(Register Reg, LiveRangeStage Stage) {
+ Info.grow(Reg.id());
+ Info[Reg].Stage = Stage;
+ }
+
+ void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
+ setStage(VirtReg.reg(), Stage);
+ }
+
+ /// Return the current stage of the register, if present, otherwise
+ /// initialize it and return that.
+ LiveRangeStage getOrInitStage(Register Reg) {
+ Info.grow(Reg.id());
+ return getStage(Reg);
+ }
+
+ unsigned getCascade(Register Reg) const { return Info[Reg].Cascade; }
+
+ void setCascade(Register Reg, unsigned Cascade) {
+ Info.grow(Reg.id());
+ Info[Reg].Cascade = Cascade;
+ }
+
+ unsigned getOrAssignNewCascade(Register Reg) {
+ unsigned Cascade = getCascade(Reg);
+ if (!Cascade) {
+ Cascade = NextCascade++;
+ setCascade(Reg, Cascade);
+ }
+ return Cascade;
+ }
+
+ unsigned getCascadeOrCurrentNext(Register Reg) const {
+ unsigned Cascade = getCascade(Reg);
+ if (!Cascade)
+ Cascade = NextCascade;
+ return Cascade;
+ }
+
+ template <typename Iterator>
+ void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
+ for (; Begin != End; ++Begin) {
+ Register Reg = *Begin;
+ Info.grow(Reg.id());
+ if (Info[Reg].Stage == RS_New)
+ Info[Reg].Stage = NewStage;
+ }
+ }
+ void LRE_DidCloneVirtReg(Register New, Register Old);
+ };
+
+ LiveRegMatrix *getInterferenceMatrix() const { return Matrix; }
+ LiveIntervals *getLiveIntervals() const { return LIS; }
+ VirtRegMap *getVirtRegMap() const { return VRM; }
+ const RegisterClassInfo &getRegClassInfo() const { return RegClassInfo; }
+ const ExtraRegInfo &getExtraInfo() const { return *ExtraInfo; }
+ size_t getQueueSize() const { return Queue.size(); }
+ // end (interface to eviction advisers)
+
+private:
+ // Convenient shortcuts.
+ using PQueue = std::priority_queue<std::pair<unsigned, unsigned>>;
+ using SmallLISet = SmallPtrSet<LiveInterval *, 4>;
+
+ // context
+ MachineFunction *MF;
+
+ // Shortcuts to some useful interface.
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ RegisterClassInfo RCI;
+
+ // analyses
+ SlotIndexes *Indexes;
+ MachineBlockFrequencyInfo *MBFI;
+ MachineDominatorTree *DomTree;
+ MachineLoopInfo *Loops;
+ MachineOptimizationRemarkEmitter *ORE;
+ EdgeBundles *Bundles;
+ SpillPlacement *SpillPlacer;
+ LiveDebugVariables *DebugVars;
+ AliasAnalysis *AA;
+
+ // state
+ std::unique_ptr<Spiller> SpillerInstance;
+ PQueue Queue;
+ std::unique_ptr<VirtRegAuxInfo> VRAI;
+ Optional<ExtraRegInfo> ExtraInfo;
+ std::unique_ptr<RegAllocEvictionAdvisor> EvictAdvisor;
+
+ // Enum CutOffStage to keep a track whether the register allocation failed
+ // because of the cutoffs encountered in last chance recoloring.
+ // Note: This is used as bitmask. New value should be next power of 2.
+ enum CutOffStage {
+ // No cutoffs encountered
+ CO_None = 0,
+
+ // lcr-max-depth cutoff encountered
+ CO_Depth = 1,
+
+ // lcr-max-interf cutoff encountered
+ CO_Interf = 2
+ };
+
+ uint8_t CutOffInfo;
+
+#ifndef NDEBUG
+ static const char *const StageName[];
+#endif
+
+ /// EvictionTrack - Keeps track of past evictions in order to optimize region
+ /// split decision.
+ class EvictionTrack {
+
+ public:
+ using EvictorInfo =
+ std::pair<Register /* evictor */, MCRegister /* physreg */>;
+ using EvicteeInfo = llvm::DenseMap<Register /* evictee */, EvictorInfo>;
+
+ private:
+ /// Each Vreg that has been evicted in the last stage of selectOrSplit will
+ /// be mapped to the evictor Vreg and the PhysReg it was evicted from.
+ EvicteeInfo Evictees;
+
+ public:
+ /// Clear all eviction information.
+ void clear() { Evictees.clear(); }
+
+ /// Clear eviction information for the given evictee Vreg.
+ /// E.g. when Vreg get's a new allocation, the old eviction info is no
+ /// longer relevant.
+ /// \param Evictee The evictee Vreg for whom we want to clear collected
+ /// eviction info.
+ void clearEvicteeInfo(Register Evictee) { Evictees.erase(Evictee); }
+
+ /// Track new eviction.
+ /// The Evictor vreg has evicted the Evictee vreg from Physreg.
+ /// \param PhysReg The physical register Evictee was evicted from.
+ /// \param Evictor The evictor Vreg that evicted Evictee.
+ /// \param Evictee The evictee Vreg.
+ void addEviction(MCRegister PhysReg, Register Evictor, Register Evictee) {
+ Evictees[Evictee].first = Evictor;
+ Evictees[Evictee].second = PhysReg;
+ }
+
+ /// Return the Evictor Vreg which evicted Evictee Vreg from PhysReg.
+ /// \param Evictee The evictee vreg.
+ /// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if
+ /// nobody has evicted Evictee from PhysReg.
+ EvictorInfo getEvictor(Register Evictee) {
+ if (Evictees.count(Evictee)) {
+ return Evictees[Evictee];
+ }
+
+ return EvictorInfo(0, 0);
+ }
+ };
+
+ // Keeps track of past evictions in order to optimize region split decision.
+ EvictionTrack LastEvicted;
+
+ // splitting state.
+ std::unique_ptr<SplitAnalysis> SA;
+ std::unique_ptr<SplitEditor> SE;
+
+ /// Cached per-block interference maps
+ InterferenceCache IntfCache;
+
+ /// All basic blocks where the current register has uses.
+ SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints;
+
+ /// Global live range splitting candidate info.
+ struct GlobalSplitCandidate {
+ // Register intended for assignment, or 0.
+ MCRegister PhysReg;
+
+ // SplitKit interval index for this candidate.
+ unsigned IntvIdx;
+
+ // Interference for PhysReg.
+ InterferenceCache::Cursor Intf;
+
+ // Bundles where this candidate should be live.
+ BitVector LiveBundles;
+ SmallVector<unsigned, 8> ActiveBlocks;
+
+ void reset(InterferenceCache &Cache, MCRegister Reg) {
+ PhysReg = Reg;
+ IntvIdx = 0;
+ Intf.setPhysReg(Cache, Reg);
+ LiveBundles.clear();
+ ActiveBlocks.clear();
+ }
+
+ // Set B[I] = C for every live bundle where B[I] was NoCand.
+ unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) {
+ unsigned Count = 0;
+ for (unsigned I : LiveBundles.set_bits())
+ if (B[I] == NoCand) {
+ B[I] = C;
+ Count++;
+ }
+ return Count;
+ }
+ };
+
+ /// Candidate info for each PhysReg in AllocationOrder.
+ /// This vector never shrinks, but grows to the size of the largest register
+ /// class.
+ SmallVector<GlobalSplitCandidate, 32> GlobalCand;
+
+ enum : unsigned { NoCand = ~0u };
+
+ /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to
+ /// NoCand which indicates the stack interval.
+ SmallVector<unsigned, 32> BundleCand;
+
+ /// Callee-save register cost, calculated once per machine function.
+ BlockFrequency CSRCost;
+
+ /// Enable or not the consideration of the cost of local intervals created
+ /// by a split candidate when choosing the best split candidate.
+ bool EnableAdvancedRASplitCost;
+
+ /// Set of broken hints that may be reconciled later because of eviction.
+ SmallSetVector<LiveInterval *, 8> SetOfBrokenHints;
+
+ /// The register cost values. This list will be recreated for each Machine
+ /// Function
+ ArrayRef<uint8_t> RegCosts;
+
+public:
+ RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses);
+
+ /// Return the pass name.
+ StringRef getPassName() const override { return "Greedy Register Allocator"; }
+
+ /// RAGreedy analysis usage.
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ void releaseMemory() override;
+ Spiller &spiller() override { return *SpillerInstance; }
+ void enqueueImpl(LiveInterval *LI) override;
+ LiveInterval *dequeue() override;
+ MCRegister selectOrSplit(LiveInterval &,
+ SmallVectorImpl<Register> &) override;
+ void aboutToRemoveInterval(LiveInterval &) override;
+
+ /// Perform register allocation.
+ bool runOnMachineFunction(MachineFunction &mf) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoPHIs);
+ }
+
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+
+ static char ID;
+
+private:
+ MCRegister selectOrSplitImpl(LiveInterval &, SmallVectorImpl<Register> &,
+ SmallVirtRegSet &, unsigned = 0);
+
+ bool LRE_CanEraseVirtReg(Register) override;
+ void LRE_WillShrinkVirtReg(Register) override;
+ void LRE_DidCloneVirtReg(Register, Register) override;
+ void enqueue(PQueue &CurQueue, LiveInterval *LI);
+ LiveInterval *dequeue(PQueue &CurQueue);
+
+ BlockFrequency calcSpillCost();
+ bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency &);
+ bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
+ bool growRegion(GlobalSplitCandidate &Cand);
+ bool splitCanCauseEvictionChain(Register Evictee, GlobalSplitCandidate &Cand,
+ unsigned BBNumber,
+ const AllocationOrder &Order);
+ bool splitCanCauseLocalSpill(unsigned VirtRegToSplit,
+ GlobalSplitCandidate &Cand, unsigned BBNumber,
+ const AllocationOrder &Order);
+ BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &,
+ const AllocationOrder &Order,
+ bool *CanCauseEvictionChain);
+ bool calcCompactRegion(GlobalSplitCandidate &);
+ void splitAroundRegion(LiveRangeEdit &, ArrayRef<unsigned>);
+ void calcGapWeights(MCRegister, SmallVectorImpl<float> &);
+ bool canEvictInterferenceInRange(const LiveInterval &VirtReg,
+ MCRegister PhysReg, SlotIndex Start,
+ SlotIndex End, EvictionCost &MaxCost) const;
+ MCRegister getCheapestEvicteeWeight(const AllocationOrder &Order,
+ const LiveInterval &VirtReg,
+ SlotIndex Start, SlotIndex End,
+ float *BestEvictWeight) const;
+ void evictInterference(LiveInterval &, MCRegister,
+ SmallVectorImpl<Register> &);
+ bool mayRecolorAllInterferences(MCRegister PhysReg, LiveInterval &VirtReg,
+ SmallLISet &RecoloringCandidates,
+ const SmallVirtRegSet &FixedRegisters);
+
+ MCRegister tryAssign(LiveInterval &, AllocationOrder &,
+ SmallVectorImpl<Register> &, const SmallVirtRegSet &);
+ MCRegister tryEvict(LiveInterval &, AllocationOrder &,
+ SmallVectorImpl<Register> &, uint8_t,
+ const SmallVirtRegSet &);
+ MCRegister tryRegionSplit(LiveInterval &, AllocationOrder &,
+ SmallVectorImpl<Register> &);
+ /// Calculate cost of region splitting.
+ unsigned calculateRegionSplitCost(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ BlockFrequency &BestCost,
+ unsigned &NumCands, bool IgnoreCSR,
+ bool *CanCauseEvictionChain = nullptr);
+ /// Perform region splitting.
+ unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
+ bool HasCompact, SmallVectorImpl<Register> &NewVRegs);
+ /// Check other options before using a callee-saved register for the first
+ /// time.
+ MCRegister tryAssignCSRFirstTime(LiveInterval &VirtReg,
+ AllocationOrder &Order, MCRegister PhysReg,
+ uint8_t &CostPerUseLimit,
+ SmallVectorImpl<Register> &NewVRegs);
+ void initializeCSRCost();
+ unsigned tryBlockSplit(LiveInterval &, AllocationOrder &,
+ SmallVectorImpl<Register> &);
+ unsigned tryInstructionSplit(LiveInterval &, AllocationOrder &,
+ SmallVectorImpl<Register> &);
+ unsigned tryLocalSplit(LiveInterval &, AllocationOrder &,
+ SmallVectorImpl<Register> &);
+ unsigned trySplit(LiveInterval &, AllocationOrder &,
+ SmallVectorImpl<Register> &, const SmallVirtRegSet &);
+ unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &,
+ SmallVectorImpl<Register> &,
+ SmallVirtRegSet &, unsigned);
+ bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<Register> &,
+ SmallVirtRegSet &, unsigned);
+ void tryHintRecoloring(LiveInterval &);
+ void tryHintsRecoloring();
+
+ /// Model the information carried by one end of a copy.
+ struct HintInfo {
+ /// The frequency of the copy.
+ BlockFrequency Freq;
+ /// The virtual register or physical register.
+ Register Reg;
+ /// Its currently assigned register.
+ /// In case of a physical register Reg == PhysReg.
+ MCRegister PhysReg;
+
+ HintInfo(BlockFrequency Freq, Register Reg, MCRegister PhysReg)
+ : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {}
+ };
+ using HintsInfo = SmallVector<HintInfo, 4>;
+
+ BlockFrequency getBrokenHintFreq(const HintsInfo &, MCRegister);
+ void collectHintInfo(Register, HintsInfo &);
+
+ /// Greedy RA statistic to remark.
+ struct RAGreedyStats {
+ unsigned Reloads = 0;
+ unsigned FoldedReloads = 0;
+ unsigned ZeroCostFoldedReloads = 0;
+ unsigned Spills = 0;
+ unsigned FoldedSpills = 0;
+ unsigned Copies = 0;
+ float ReloadsCost = 0.0f;
+ float FoldedReloadsCost = 0.0f;
+ float SpillsCost = 0.0f;
+ float FoldedSpillsCost = 0.0f;
+ float CopiesCost = 0.0f;
+
+ bool isEmpty() {
+ return !(Reloads || FoldedReloads || Spills || FoldedSpills ||
+ ZeroCostFoldedReloads || Copies);
+ }
+
+ void add(RAGreedyStats other) {
+ Reloads += other.Reloads;
+ FoldedReloads += other.FoldedReloads;
+ ZeroCostFoldedReloads += other.ZeroCostFoldedReloads;
+ Spills += other.Spills;
+ FoldedSpills += other.FoldedSpills;
+ Copies += other.Copies;
+ ReloadsCost += other.ReloadsCost;
+ FoldedReloadsCost += other.FoldedReloadsCost;
+ SpillsCost += other.SpillsCost;
+ FoldedSpillsCost += other.FoldedSpillsCost;
+ CopiesCost += other.CopiesCost;
+ }
+
+ void report(MachineOptimizationRemarkMissed &R);
+ };
+
+ /// Compute statistic for a basic block.
+ RAGreedyStats computeStats(MachineBasicBlock &MBB);
+
+ /// Compute and report statistic through a remark.
+ RAGreedyStats reportStats(MachineLoop *L);
+
+ /// Report the statistic for each loop.
+ void reportStats();
+};
+} // namespace llvm
+#endif // #ifndef LLVM_CODEGEN_REGALLOCGREEDY_H_
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
index c0a07ec4c91d..424ad7419165 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -533,6 +533,22 @@ Register RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
Candidates.reset(*AI);
}
+ // If we have already scavenged some registers, remove them from the
+ // candidates. If we end up recursively calling eliminateFrameIndex, we don't
+ // want to be clobbering previously scavenged registers or their associated
+ // stack slots.
+ for (ScavengedInfo &SI : Scavenged) {
+ if (SI.Reg) {
+ if (isRegUsed(SI.Reg)) {
+ LLVM_DEBUG(
+ dbgs() << "Removing " << printReg(SI.Reg, TRI) <<
+ " from scavenging candidates since it was already scavenged\n");
+ for (MCRegAliasIterator AI(SI.Reg, TRI, true); AI.isValid(); ++AI)
+ Candidates.reset(*AI);
+ }
+ }
+ }
+
// Try to find a register that's unused if there is one, as then we won't
// have to spill.
BitVector Available = getRegsAvailable(RC);
@@ -553,6 +569,12 @@ Register RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
if (!AllowSpill)
return 0;
+#ifndef NDEBUG
+ for (ScavengedInfo &SI : Scavenged) {
+ assert(SI.Reg != SReg && "scavenged a previously scavenged register");
+ }
+#endif
+
ScavengedInfo &Scavenged = spill(SReg, *RC, SPAdj, I, UseMI);
Scavenged.Restore = &*std::prev(UseMI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index 6e05de888cc0..a61a2b2728fa 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -30,8 +30,7 @@ using namespace llvm;
ScoreboardHazardRecognizer::ScoreboardHazardRecognizer(
const InstrItineraryData *II, const ScheduleDAG *SchedDAG,
const char *ParentDebugType)
- : ScheduleHazardRecognizer(), DebugType(ParentDebugType), ItinData(II),
- DAG(SchedDAG) {
+ : DebugType(ParentDebugType), ItinData(II), DAG(SchedDAG) {
(void)DebugType;
// Determine the maximum depth of any itinerary. This determines the depth of
// the scoreboard. We always make the scoreboard at least 1 cycle deep to
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 067ad819e0d2..932f263d2558 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -593,7 +593,7 @@ namespace {
SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
SDValue MatchLoadCombine(SDNode *N);
SDValue mergeTruncStores(StoreSDNode *N);
- SDValue ReduceLoadWidth(SDNode *N);
+ SDValue reduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue splitMergedValStore(StoreSDNode *ST);
SDValue TransformFPLoadStorePair(SDNode *N);
@@ -1070,7 +1070,7 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
return DAG.getNode(Opc, DL, VT, N00, OpNode);
return SDValue();
}
- if (N0.hasOneUse()) {
+ if (TLI.isReassocProfitable(DAG, N0, N1)) {
// Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
// iff (op x, c1) has one use
if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1))
@@ -3058,9 +3058,8 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
//
// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
// a single path for carry/borrow out propagation:
-static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
- const TargetLowering &TLI, SDValue Carry0,
- SDValue Carry1, SDNode *N) {
+static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
+ SDValue Carry0, SDValue Carry1, SDNode *N) {
if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
return SDValue();
unsigned Opcode = Carry0.getOpcode();
@@ -3908,7 +3907,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
// use.
{
- SDValue Sh(nullptr, 0), Y(nullptr, 0);
+ SDValue Sh, Y;
// Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
if (N0.getOpcode() == ISD::SHL &&
@@ -4471,15 +4470,15 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
return FoldedVOp;
// fold (mulhs x, 0) -> 0
- // do not return N0/N1, because undef node may exist.
- if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
- ISD::isConstantSplatVectorAllZeros(N1.getNode()))
+ // do not return N1, because undef node may exist.
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return DAG.getConstant(0, DL, VT);
}
// fold (mulhs x, 0) -> 0
if (isNullConstant(N1))
return N1;
+
// fold (mulhs x, 1) -> (sra x, size(x)-1)
if (isOneConstant(N1))
return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
@@ -4531,18 +4530,19 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
return FoldedVOp;
// fold (mulhu x, 0) -> 0
- // do not return N0/N1, because undef node may exist.
- if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
- ISD::isConstantSplatVectorAllZeros(N1.getNode()))
+ // do not return N1, because undef node may exist.
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return DAG.getConstant(0, DL, VT);
}
// fold (mulhu x, 0) -> 0
if (isNullConstant(N1))
return N1;
+
// fold (mulhu x, 1) -> 0
if (isOneConstant(N1))
return DAG.getConstant(0, DL, N0.getValueType());
+
// fold (mulhu x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
return DAG.getConstant(0, DL, VT);
@@ -4892,6 +4892,42 @@ static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
: DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
}
+static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
+ SDValue N3, ISD::CondCode CC,
+ SelectionDAG &DAG) {
+ // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
+ // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
+ // be truncated versions of the the setcc (N0/N1).
+ if ((N0 != N2 &&
+ (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
+ N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
+ return SDValue();
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ ConstantSDNode *N3C = isConstOrConstSplat(N3);
+ if (!N1C || !N3C)
+ return SDValue();
+ const APInt &C1 = N1C->getAPIntValue();
+ const APInt &C3 = N3C->getAPIntValue();
+ if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
+ C1 != C3.zextOrSelf(C1.getBitWidth()))
+ return SDValue();
+
+ unsigned BW = (C1 + 1).exactLogBase2();
+ EVT FPVT = N0.getOperand(0).getValueType();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
+ if (FPVT.isVector())
+ NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
+ FPVT.getVectorElementCount());
+ if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(ISD::FP_TO_UINT_SAT,
+ FPVT, NewVT))
+ return SDValue();
+
+ SDValue Sat =
+ DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
+ DAG.getValueType(NewVT.getScalarType()));
+ return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
+}
+
SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4934,6 +4970,9 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
if (SDValue S = PerformMinMaxFpToSatCombine(
N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
return S;
+ if (Opcode == ISD::UMIN)
+ if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
+ return S;
// Simplify the operands using demanded-bits information.
if (SimplifyDemandedBits(SDValue(N, 0)))
@@ -5491,6 +5530,8 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
// Some constants may need fixing up later if they are too large.
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (Mask->getValueType(0) != C->getValueType(0))
+ return false;
if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
(Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
NodesWithConsts.insert(N);
@@ -5524,9 +5565,9 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
case ISD::AssertZext: {
unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
- EVT VT = Op.getOpcode() == ISD::AssertZext ?
- cast<VTSDNode>(Op.getOperand(1))->getVT() :
- Op.getOperand(0).getValueType();
+ EVT VT = Op.getOpcode() == ISD::AssertZext
+ ? cast<VTSDNode>(Op.getOperand(1))->getVT()
+ : Op.getOperand(0).getValueType();
// We can accept extending nodes if the mask is wider or an equal
// width to the original type.
@@ -5534,6 +5575,15 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
continue;
break;
}
+ case ISD::ANY_EXTEND: {
+ unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
+ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ EVT VT = Op.getOperand(0).getValueType();
+ if (ExtVT.bitsGE(VT))
+ break;
+ // Fallthrough to searching for nodes from the operands of the extend.
+ LLVM_FALLTHROUGH;
+ }
case ISD::OR:
case ISD::XOR:
case ISD::AND:
@@ -5593,12 +5643,14 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
// masking.
if (FixupNode) {
LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
- SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
- FixupNode->getValueType(0),
- SDValue(FixupNode, 0), MaskOp);
+ SDValue MaskOpT = DAG.getZExtOrTrunc(MaskOp, SDLoc(FixupNode),
+ FixupNode->getValueType(0));
+ SDValue And =
+ DAG.getNode(ISD::AND, SDLoc(FixupNode), FixupNode->getValueType(0),
+ SDValue(FixupNode, 0), MaskOpT);
DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
if (And.getOpcode() == ISD ::AND)
- DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
+ DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOpT);
}
// Narrow any constants that need it.
@@ -5607,10 +5659,12 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
SDValue Op1 = LogicN->getOperand(1);
if (isa<ConstantSDNode>(Op0))
- std::swap(Op0, Op1);
+ std::swap(Op0, Op1);
- SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
- Op1, MaskOp);
+ SDValue MaskOpT =
+ DAG.getZExtOrTrunc(MaskOp, SDLoc(Op1), Op1.getValueType());
+ SDValue And =
+ DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOpT);
DAG.UpdateNodeOperands(LogicN, Op0, And);
}
@@ -5618,13 +5672,15 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
// Create narrow loads.
for (auto *Load : Loads) {
LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
+ SDValue MaskOpT =
+ DAG.getZExtOrTrunc(MaskOp, SDLoc(Load), Load->getValueType(0));
SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
- SDValue(Load, 0), MaskOp);
+ SDValue(Load, 0), MaskOpT);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
if (And.getOpcode() == ISD ::AND)
And = SDValue(
- DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
- SDValue NewLoad = ReduceLoadWidth(And.getNode());
+ DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOpT), 0);
+ SDValue NewLoad = reduceLoadWidth(And.getNode());
assert(NewLoad &&
"Shouldn't be masking the load if it can't be narrowed");
CombineTo(Load, NewLoad, NewLoad.getValue(1));
@@ -5799,18 +5855,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return FoldedVOp;
// fold (and x, 0) -> 0, vector edition
- if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
- // do not return N0, because undef node may exist in N0
- return DAG.getConstant(APInt::getZero(N0.getScalarValueSizeInBits()),
- SDLoc(N), N0.getValueType());
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
// do not return N1, because undef node may exist in N1
return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
SDLoc(N), N1.getValueType());
// fold (and x, -1) -> x, vector edition
- if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
- return N1;
if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
return N0;
@@ -5862,7 +5912,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue Shuffle = XformToShuffleWithZero(N))
return Shuffle;
- if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
+ if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
return Combined;
// fold (and (or x, C), D) -> D if (C & D) == D
@@ -6024,7 +6074,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
(N0.getOpcode() == ISD::ANY_EXTEND &&
N0.getOperand(0).getOpcode() == ISD::LOAD))) {
- if (SDValue Res = ReduceLoadWidth(N)) {
+ if (SDValue Res = reduceLoadWidth(N)) {
LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
AddToWorklist(N);
@@ -6659,7 +6709,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SDValue Combined = visitORLike(N0, N1, N))
return Combined;
- if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
+ if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
return Combined;
// Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
@@ -8156,7 +8206,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
- if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
+ if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
return Combined;
return SDValue();
@@ -8948,6 +8998,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
return MULH;
+ // Attempt to convert a sra of a load into a narrower sign-extending load.
+ if (SDValue NarrowLoad = reduceLoadWidth(N))
+ return NarrowLoad;
+
return SDValue();
}
@@ -9140,7 +9194,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return NewSRL;
// Attempt to convert a srl of a load into a narrower zero-extending load.
- if (SDValue NarrowLoad = ReduceLoadWidth(N))
+ if (SDValue NarrowLoad = reduceLoadWidth(N))
return NarrowLoad;
// Here is a common situation. We want to optimize:
@@ -9358,6 +9412,17 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) {
// fold (bswap (bswap x)) -> x
if (N0.getOpcode() == ISD::BSWAP)
return N0->getOperand(0);
+
+ // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
+ // isn't supported, it will be expanded to bswap followed by a manual reversal
+ // of bits in each byte. By placing bswaps before bitreverse, we can remove
+ // the two bswaps if the bitreverse gets expanded.
+ if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
+ SDLoc DL(N);
+ SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
+ return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
+ }
+
return SDValue();
}
@@ -10288,6 +10353,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
return S;
+ if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
+ return S;
// If this select has a condition (setcc) with narrower operands than the
// select, try to widen the compare to match the select width.
@@ -11357,7 +11424,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (N0.getOpcode() == ISD::TRUNCATE) {
// fold (sext (truncate (load x))) -> (sext (smaller load x))
// fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
- if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
+ if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
SDNode *oye = N0.getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
@@ -11621,7 +11688,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (N0.getOpcode() == ISD::TRUNCATE) {
// fold (zext (truncate (load x))) -> (zext (smaller load x))
// fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
- if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
+ if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
SDNode *oye = N0.getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
@@ -11864,7 +11931,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// fold (aext (truncate (load x))) -> (aext (smaller load x))
// fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
if (N0.getOpcode() == ISD::TRUNCATE) {
- if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
+ if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
SDNode *oye = N0.getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
@@ -12095,13 +12162,10 @@ SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
return SDValue();
}
-/// If the result of a wider load is shifted to right of N bits and then
-/// truncated to a narrower type and where N is a multiple of number of bits of
-/// the narrower type, transform it to a narrower load from address + N / num of
-/// bits of new type. Also narrow the load if the result is masked with an AND
-/// to effectively produce a smaller type. If the result is to be extended, also
-/// fold the extension to form a extending load.
-SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
+/// If the result of a load is shifted/masked/truncated to an effectively
+/// narrower type, try to transform the load to a narrower type and/or
+/// use an extending load.
+SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
unsigned Opc = N->getOpcode();
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
@@ -12113,32 +12177,48 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
if (VT.isVector())
return SDValue();
+ // The ShAmt variable is used to indicate that we've consumed a right
+ // shift. I.e. we want to narrow the width of the load by skipping to load the
+ // ShAmt least significant bits.
unsigned ShAmt = 0;
+ // A special case is when the least significant bits from the load are masked
+ // away, but using an AND rather than a right shift. HasShiftedOffset is used
+ // to indicate that the narrowed load should be left-shifted ShAmt bits to get
+ // the result.
bool HasShiftedOffset = false;
// Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
// extended to VT.
if (Opc == ISD::SIGN_EXTEND_INREG) {
ExtType = ISD::SEXTLOAD;
ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
- } else if (Opc == ISD::SRL) {
- // Another special-case: SRL is basically zero-extending a narrower value,
- // or it maybe shifting a higher subword, half or byte into the lowest
- // bits.
- ExtType = ISD::ZEXTLOAD;
- N0 = SDValue(N, 0);
+ } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
+ // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
+ // value, or it may be shifting a higher subword, half or byte into the
+ // lowest bits.
- auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
- auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
- if (!N01 || !LN0)
+ // Only handle shift with constant shift amount, and the shiftee must be a
+ // load.
+ auto *LN = dyn_cast<LoadSDNode>(N0);
+ auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!N1C || !LN)
+ return SDValue();
+ // If the shift amount is larger than the memory type then we're not
+ // accessing any of the loaded bytes.
+ ShAmt = N1C->getZExtValue();
+ uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
+ if (MemoryWidth <= ShAmt)
+ return SDValue();
+ // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
+ ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
+ // If original load is a SEXTLOAD then we can't simply replace it by a
+ // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
+ // followed by a ZEXT, but that is not handled at the moment). Similarly if
+ // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
+ if ((LN->getExtensionType() == ISD::SEXTLOAD ||
+ LN->getExtensionType() == ISD::ZEXTLOAD) &&
+ LN->getExtensionType() != ExtType)
return SDValue();
-
- uint64_t ShiftAmt = N01->getZExtValue();
- uint64_t MemoryWidth = LN0->getMemoryVT().getScalarSizeInBits();
- if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
- ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
- else
- ExtVT = EVT::getIntegerVT(*DAG.getContext(),
- VT.getScalarSizeInBits() - ShiftAmt);
} else if (Opc == ISD::AND) {
// An AND with a constant mask is the same as a truncate + zero-extend.
auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
@@ -12161,55 +12241,80 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
}
- if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
- SDValue SRL = N0;
- if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
- ShAmt = ConstShift->getZExtValue();
- unsigned EVTBits = ExtVT.getScalarSizeInBits();
- // Is the shift amount a multiple of size of VT?
- if ((ShAmt & (EVTBits-1)) == 0) {
- N0 = N0.getOperand(0);
- // Is the load width a multiple of size of VT?
- if ((N0.getScalarValueSizeInBits() & (EVTBits - 1)) != 0)
- return SDValue();
- }
+ // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
+ // a right shift. Here we redo some of those checks, to possibly adjust the
+ // ExtVT even further based on "a masking AND". We could also end up here for
+ // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
+ // need to be done here as well.
+ if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
+ SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
+ // Bail out when the SRL has more than one use. This is done for historical
+ // (undocumented) reasons. Maybe intent was to guard the AND-masking below
+ // check below? And maybe it could be non-profitable to do the transform in
+ // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
+ // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
+ if (!SRL.hasOneUse())
+ return SDValue();
+
+ // Only handle shift with constant shift amount, and the shiftee must be a
+ // load.
+ auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
+ auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
+ if (!SRL1C || !LN)
+ return SDValue();
- // At this point, we must have a load or else we can't do the transform.
- auto *LN0 = dyn_cast<LoadSDNode>(N0);
- if (!LN0) return SDValue();
+ // If the shift amount is larger than the input type then we're not
+ // accessing any of the loaded bytes. If the load was a zextload/extload
+ // then the result of the shift+trunc is zero/undef (handled elsewhere).
+ ShAmt = SRL1C->getZExtValue();
+ uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
+ if (ShAmt >= MemoryWidth)
+ return SDValue();
- // Because a SRL must be assumed to *need* to zero-extend the high bits
- // (as opposed to anyext the high bits), we can't combine the zextload
- // lowering of SRL and an sextload.
- if (LN0->getExtensionType() == ISD::SEXTLOAD)
- return SDValue();
+ // Because a SRL must be assumed to *need* to zero-extend the high bits
+ // (as opposed to anyext the high bits), we can't combine the zextload
+ // lowering of SRL and an sextload.
+ if (LN->getExtensionType() == ISD::SEXTLOAD)
+ return SDValue();
- // If the shift amount is larger than the input type then we're not
- // accessing any of the loaded bytes. If the load was a zextload/extload
- // then the result of the shift+trunc is zero/undef (handled elsewhere).
- if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
+ // Avoid reading outside the memory accessed by the original load (could
+ // happened if we only adjust the load base pointer by ShAmt). Instead we
+ // try to narrow the load even further. The typical scenario here is:
+ // (i64 (truncate (i96 (srl (load x), 64)))) ->
+ // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
+ if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
+ // Don't replace sextload by zextload.
+ if (ExtType == ISD::SEXTLOAD)
return SDValue();
-
- // If the SRL is only used by a masking AND, we may be able to adjust
- // the ExtVT to make the AND redundant.
- SDNode *Mask = *(SRL->use_begin());
- if (Mask->getOpcode() == ISD::AND &&
- isa<ConstantSDNode>(Mask->getOperand(1))) {
- const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
- if (ShiftMask.isMask()) {
- EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
- ShiftMask.countTrailingOnes());
- // If the mask is smaller, recompute the type.
- if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
- TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
- ExtVT = MaskedVT;
- }
+ // Narrow the load.
+ ExtType = ISD::ZEXTLOAD;
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
+ }
+
+ // If the SRL is only used by a masking AND, we may be able to adjust
+ // the ExtVT to make the AND redundant.
+ SDNode *Mask = *(SRL->use_begin());
+ if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(Mask->getOperand(1))) {
+ const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
+ if (ShiftMask.isMask()) {
+ EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
+ ShiftMask.countTrailingOnes());
+ // If the mask is smaller, recompute the type.
+ if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
+ TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
+ ExtVT = MaskedVT;
}
}
+
+ N0 = SRL.getOperand(0);
}
- // If the load is shifted left (and the result isn't shifted back right),
- // we can fold the truncate through the shift.
+ // If the load is shifted left (and the result isn't shifted back right), we
+ // can fold a truncate through the shift. The typical scenario is that N
+ // points at a TRUNCATE here so the attempted fold is:
+ // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
+ // ShLeftAmt will indicate how much a narrowed load should be shifted left.
unsigned ShLeftAmt = 0;
if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
@@ -12237,12 +12342,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
return LVTStoreBits - EVTStoreBits - ShAmt;
};
- // For big endian targets, we need to adjust the offset to the pointer to
- // load the correct bytes.
- if (DAG.getDataLayout().isBigEndian())
- ShAmt = AdjustBigEndianShift(ShAmt);
+ // We need to adjust the pointer to the load by ShAmt bits in order to load
+ // the correct bytes.
+ unsigned PtrAdjustmentInBits =
+ DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
- uint64_t PtrOff = ShAmt / 8;
+ uint64_t PtrOff = PtrAdjustmentInBits / 8;
Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
SDLoc DL(LN0);
// The original load itself didn't wrap, so an offset within it doesn't.
@@ -12285,11 +12390,6 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
}
if (HasShiftedOffset) {
- // Recalculate the shift amount after it has been altered to calculate
- // the offset.
- if (DAG.getDataLayout().isBigEndian())
- ShAmt = AdjustBigEndianShift(ShAmt);
-
// We're using a shifted mask, so the load now has an offset. This means
// that data has been loaded into the lower bytes than it would have been
// before, so we need to shl the loaded data into the correct position in the
@@ -12320,7 +12420,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
// If the input is already sign extended, just drop the extension.
- if (ExtVTBits >= DAG.ComputeMinSignedBits(N0))
+ if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
return N0;
// fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
@@ -12336,7 +12436,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N00 = N0.getOperand(0);
unsigned N00Bits = N00.getScalarValueSizeInBits();
- if ((N00Bits <= ExtVTBits || DAG.ComputeMinSignedBits(N00) <= ExtVTBits) &&
+ if ((N00Bits <= ExtVTBits ||
+ DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
(!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
}
@@ -12355,7 +12456,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
if ((N00Bits == ExtVTBits ||
(!IsZext && (N00Bits < ExtVTBits ||
- DAG.ComputeMinSignedBits(N00) <= ExtVTBits))) &&
+ DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
(!LegalOperations ||
TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
@@ -12381,7 +12482,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// fold (sext_in_reg (load x)) -> (smaller sextload x)
// fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
- if (SDValue NarrowLoad = ReduceLoadWidth(N))
+ if (SDValue NarrowLoad = reduceLoadWidth(N))
return NarrowLoad;
// fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
@@ -12668,7 +12769,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// fold (truncate (load x)) -> (smaller load x)
// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
- if (SDValue Reduced = ReduceLoadWidth(N))
+ if (SDValue Reduced = reduceLoadWidth(N))
return Reduced;
// Handle the case where the load remains an extending load even
@@ -17491,6 +17592,10 @@ void DAGCombiner::getStoreMergeCandidates(
for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
TryToAddCandidate(I2);
}
+ // Check stores that depend on the root (e.g. Store 3 in the chart above).
+ if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
+ TryToAddCandidate(I);
+ }
}
} else {
for (auto I = RootNode->use_begin(), E = RootNode->use_end();
@@ -18351,6 +18456,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
Value.getValueType().isInteger() &&
(!isa<ConstantSDNode>(Value) ||
!cast<ConstantSDNode>(Value)->isOpaque())) {
+ // Convert a truncating store of a extension into a standard store.
+ if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
+ Value.getOpcode() == ISD::SIGN_EXTEND ||
+ Value.getOpcode() == ISD::ANY_EXTEND) &&
+ Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
+ TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
+ return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
+ ST->getMemOperand());
+
APInt TruncDemandedBits =
APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
ST->getMemoryVT().getScalarSizeInBits());
@@ -23299,6 +23413,8 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
return S;
+ if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
+ return S;
return SDValue();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 4d1449bc2751..bfde35935c7b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1775,12 +1775,13 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) {
return false;
case Instruction::Call:
- // On AIX, call lowering uses the DAG-ISEL path currently so that the
+ // On AIX, normal call lowering uses the DAG-ISEL path currently so that the
// callee of the direct function call instruction will be mapped to the
// symbol for the function's entry point, which is distinct from the
// function descriptor symbol. The latter is the symbol whose XCOFF symbol
// name is the C-linkage name of the source level function.
- if (TM.getTargetTriple().isOSAIX())
+ // But fast isel still has the ability to do selection for intrinsics.
+ if (TM.getTargetTriple().isOSAIX() && !isa<IntrinsicInst>(I))
return false;
return selectCall(I);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 5dfb65ef131a..54481b94fdd8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3593,9 +3593,16 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if (Legalized) {
// If we expanded the SETCC by swapping LHS and RHS, or by inverting the
// condition code, create a new SETCC node.
- if (Tmp3.getNode())
- Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
- Tmp1, Tmp2, Tmp3, Node->getFlags());
+ if (Tmp3.getNode()) {
+ if (IsStrict) {
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getVTList(),
+ {Chain, Tmp1, Tmp2, Tmp3}, Node->getFlags());
+ Chain = Tmp1.getValue(1);
+ } else {
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Tmp1,
+ Tmp2, Tmp3, Node->getFlags());
+ }
+ }
// If we expanded the SETCC by inverting the condition code, then wrap
// the existing SETCC in a NOT to restore the intended condition.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 27f9cede1922..6bf38d7296a8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1193,7 +1193,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
llvm_unreachable("Do not know how to expand the result of this operator!");
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
- case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_Select(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::MERGE_VALUES: ExpandRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 518e525e13d0..8c7b90b6cd33 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -75,30 +75,28 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
break;
case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast<MaskedGatherSDNode>(N));
break;
- case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;
- case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break;
+ case ISD::SELECT:
+ case ISD::VSELECT:
+ case ISD::VP_SELECT:
+ Res = PromoteIntRes_Select(N);
+ break;
case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS:
case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
case ISD::SMIN:
- case ISD::SMAX:
- Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ false);
- break;
+ case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break;
case ISD::UMIN:
case ISD::UMAX: Res = PromoteIntRes_UMINUMAX(N); break;
case ISD::SHL:
- Res = PromoteIntRes_SHL(N, /*IsVP*/ false);
- break;
+ case ISD::VP_SHL: Res = PromoteIntRes_SHL(N); break;
case ISD::SIGN_EXTEND_INREG:
Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
case ISD::SRA:
- Res = PromoteIntRes_SRA(N, /*IsVP*/ false);
- break;
+ case ISD::VP_ASHR: Res = PromoteIntRes_SRA(N); break;
case ISD::SRL:
- Res = PromoteIntRes_SRL(N, /*IsVP*/ false);
- break;
+ case ISD::VP_LSHR: Res = PromoteIntRes_SRL(N); break;
case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;
case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
@@ -154,18 +152,22 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
- Res = PromoteIntRes_SimpleIntBinOp(N, /*IsVP*/ false);
- break;
+ case ISD::VP_AND:
+ case ISD::VP_OR:
+ case ISD::VP_XOR:
+ case ISD::VP_ADD:
+ case ISD::VP_SUB:
+ case ISD::VP_MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
case ISD::SDIV:
case ISD::SREM:
- Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ false);
- break;
+ case ISD::VP_SDIV:
+ case ISD::VP_SREM: Res = PromoteIntRes_SExtIntBinOp(N); break;
case ISD::UDIV:
case ISD::UREM:
- Res = PromoteIntRes_ZExtIntBinOp(N, /*IsVP*/ false);
- break;
+ case ISD::VP_UDIV:
+ case ISD::VP_UREM: Res = PromoteIntRes_ZExtIntBinOp(N); break;
case ISD::SADDO:
case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
@@ -260,32 +262,6 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FSHR:
Res = PromoteIntRes_FunnelShift(N);
break;
-
- case ISD::VP_AND:
- case ISD::VP_OR:
- case ISD::VP_XOR:
- case ISD::VP_ADD:
- case ISD::VP_SUB:
- case ISD::VP_MUL:
- Res = PromoteIntRes_SimpleIntBinOp(N, /*IsVP*/ true);
- break;
- case ISD::VP_SDIV:
- case ISD::VP_SREM:
- Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ true);
- break;
- case ISD::VP_UDIV:
- case ISD::VP_UREM:
- Res = PromoteIntRes_ZExtIntBinOp(N, /*IsVP*/ true);
- break;
- case ISD::VP_SHL:
- Res = PromoteIntRes_SHL(N, /*IsVP*/ true);
- break;
- case ISD::VP_ASHR:
- Res = PromoteIntRes_SRA(N, /*IsVP*/ true);
- break;
- case ISD::VP_LSHR:
- Res = PromoteIntRes_SRL(N, /*IsVP*/ true);
- break;
}
// If the result is null then the sub-method took care of registering it.
@@ -1127,20 +1103,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
return Res;
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
- SDValue LHS = GetPromotedInteger(N->getOperand(1));
- SDValue RHS = GetPromotedInteger(N->getOperand(2));
- return DAG.getSelect(SDLoc(N),
- LHS.getValueType(), N->getOperand(0), LHS, RHS);
-}
-
-SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_Select(SDNode *N) {
SDValue Mask = N->getOperand(0);
SDValue LHS = GetPromotedInteger(N->getOperand(1));
SDValue RHS = GetPromotedInteger(N->getOperand(2));
- return DAG.getNode(ISD::VSELECT, SDLoc(N),
- LHS.getValueType(), Mask, LHS, RHS);
+
+ unsigned Opcode = N->getOpcode();
+ return Opcode == ISD::VP_SELECT
+ ? DAG.getNode(Opcode, SDLoc(N), LHS.getValueType(), Mask, LHS, RHS,
+ N->getOperand(3))
+ : DAG.getNode(Opcode, SDLoc(N), LHS.getValueType(), Mask, LHS,
+ RHS);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
@@ -1193,12 +1167,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
return DAG.getSExtOrTrunc(SetCC, dl, NVT);
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N, bool IsVP) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
SDValue LHS = GetPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
- if (!IsVP)
+ if (N->getOpcode() != ISD::VP_SHL)
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
N->getOperand(2), N->getOperand(3));
@@ -1210,34 +1184,40 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
Op.getValueType(), Op, N->getOperand(1));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N, bool IsVP) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
// The input may have strange things in the top bits of the registers, but
// these operations don't care. They may have weird bits going out, but
// that too is okay if they are integer operations.
SDValue LHS = GetPromotedInteger(N->getOperand(0));
SDValue RHS = GetPromotedInteger(N->getOperand(1));
- if (!IsVP)
+ if (N->getNumOperands() == 2)
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ assert(N->getNumOperands() == 4 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
N->getOperand(2), N->getOperand(3));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N, bool IsVP) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) {
// Sign extend the input.
SDValue LHS = SExtPromotedInteger(N->getOperand(0));
SDValue RHS = SExtPromotedInteger(N->getOperand(1));
- if (!IsVP)
+ if (N->getNumOperands() == 2)
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ assert(N->getNumOperands() == 4 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
N->getOperand(2), N->getOperand(3));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N, bool IsVP) {
+SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) {
// Zero extend the input.
SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
- if (!IsVP)
+ if (N->getNumOperands() == 2)
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ assert(N->getNumOperands() == 4 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
N->getOperand(2), N->getOperand(3));
}
@@ -1251,25 +1231,25 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) {
LHS.getValueType(), LHS, RHS);
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N, bool IsVP) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
// The input value must be properly sign extended.
SDValue LHS = SExtPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
- if (!IsVP)
+ if (N->getOpcode() != ISD::VP_ASHR)
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
N->getOperand(2), N->getOperand(3));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N, bool IsVP) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
// The input value must be properly zero extended.
SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
- if (!IsVP)
+ if (N->getOpcode() != ISD::VP_LSHR)
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
N->getOperand(2), N->getOperand(3));
@@ -1653,7 +1633,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::UDIVFIX:
case ISD::UDIVFIXSAT: Res = PromoteIntOp_FIX(N); break;
- case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break;
+ case ISD::FPOWI:
+ case ISD::STRICT_FPOWI: Res = PromoteIntOp_FPOWI(N); break;
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
@@ -1703,50 +1684,64 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
/// PromoteSetCCOperands - Promote the operands of a comparison. This code is
/// shared among BR_CC, SELECT_CC, and SETCC handlers.
-void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
+void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &LHS, SDValue &RHS,
ISD::CondCode CCCode) {
// We have to insert explicit sign or zero extends. Note that we could
// insert sign extends for ALL conditions. For those operations where either
- // zero or sign extension would be valid, use SExtOrZExtPromotedInteger
- // which will choose the cheapest for the target.
- switch (CCCode) {
- default: llvm_unreachable("Unknown integer comparison!");
- case ISD::SETEQ:
- case ISD::SETNE: {
- SDValue OpL = GetPromotedInteger(NewLHS);
- SDValue OpR = GetPromotedInteger(NewRHS);
-
- // We would prefer to promote the comparison operand with sign extension.
- // If the width of OpL/OpR excluding the duplicated sign bits is no greater
- // than the width of NewLHS/NewRH, we can avoid inserting real truncate
- // instruction, which is redundant eventually.
- unsigned OpLEffectiveBits = DAG.ComputeMinSignedBits(OpL);
- unsigned OpREffectiveBits = DAG.ComputeMinSignedBits(OpR);
- if (OpLEffectiveBits <= NewLHS.getScalarValueSizeInBits() &&
- OpREffectiveBits <= NewRHS.getScalarValueSizeInBits()) {
- NewLHS = OpL;
- NewRHS = OpR;
- } else {
- NewLHS = SExtOrZExtPromotedInteger(NewLHS);
- NewRHS = SExtOrZExtPromotedInteger(NewRHS);
+ // zero or sign extension would be valid, we ask the target which extension
+ // it would prefer.
+
+ // Signed comparisons always require sign extension.
+ if (ISD::isSignedIntSetCC(CCCode)) {
+ LHS = SExtPromotedInteger(LHS);
+ RHS = SExtPromotedInteger(RHS);
+ return;
+ }
+
+ assert((ISD::isUnsignedIntSetCC(CCCode) || ISD::isIntEqualitySetCC(CCCode)) &&
+ "Unknown integer comparison!");
+
+ SDValue OpL = GetPromotedInteger(LHS);
+ SDValue OpR = GetPromotedInteger(RHS);
+
+ if (TLI.isSExtCheaperThanZExt(LHS.getValueType(), OpL.getValueType())) {
+ // The target would prefer to promote the comparison operand with sign
+ // extension. Honor that unless the promoted values are already zero
+ // extended.
+ unsigned OpLEffectiveBits =
+ DAG.computeKnownBits(OpL).countMaxActiveBits();
+ unsigned OpREffectiveBits =
+ DAG.computeKnownBits(OpR).countMaxActiveBits();
+ if (OpLEffectiveBits <= LHS.getScalarValueSizeInBits() &&
+ OpREffectiveBits <= RHS.getScalarValueSizeInBits()) {
+ LHS = OpL;
+ RHS = OpR;
+ return;
}
- break;
+
+ // The promoted values aren't zero extended, use a sext_inreg.
+ LHS = SExtPromotedInteger(LHS);
+ RHS = SExtPromotedInteger(RHS);
+ return;
}
- case ISD::SETUGE:
- case ISD::SETUGT:
- case ISD::SETULE:
- case ISD::SETULT:
- NewLHS = SExtOrZExtPromotedInteger(NewLHS);
- NewRHS = SExtOrZExtPromotedInteger(NewRHS);
- break;
- case ISD::SETGE:
- case ISD::SETGT:
- case ISD::SETLT:
- case ISD::SETLE:
- NewLHS = SExtPromotedInteger(NewLHS);
- NewRHS = SExtPromotedInteger(NewRHS);
- break;
+
+ // Prefer to promote the comparison operand with zero extension.
+
+ // If the width of OpL/OpR excluding the duplicated sign bits is no greater
+ // than the width of LHS/RHS, we can avoid/ inserting a zext_inreg operation
+ // that we might not be able to remove.
+ unsigned OpLEffectiveBits = DAG.ComputeMaxSignificantBits(OpL);
+ unsigned OpREffectiveBits = DAG.ComputeMaxSignificantBits(OpR);
+ if (OpLEffectiveBits <= LHS.getScalarValueSizeInBits() &&
+ OpREffectiveBits <= RHS.getScalarValueSizeInBits()) {
+ LHS = OpL;
+ RHS = OpR;
+ return;
}
+
+ // Otherwise, use zext_inreg.
+ LHS = ZExtPromotedInteger(LHS);
+ RHS = ZExtPromotedInteger(RHS);
}
SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
@@ -2099,8 +2094,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) {
}
SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) {
- // FIXME: Support for promotion of STRICT_FPOWI is not implemented yet.
- assert(N->getOpcode() == ISD::FPOWI && "No STRICT_FPOWI support here yet.");
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
// The integer operand is the last operand in FPOWI (so the result and
// floating point operand is already type legalized).
@@ -2118,17 +2113,19 @@ SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) {
DAG.getContext()->emitError("Don't know how to promote fpowi to fpow");
return DAG.getUNDEF(N->getValueType(0));
}
+ unsigned OpOffset = IsStrict ? 1 : 0;
// The exponent should fit in a sizeof(int) type for the libcall to be valid.
assert(DAG.getLibInfo().getIntSize() ==
- N->getOperand(1).getValueType().getSizeInBits() &&
+ N->getOperand(1 + OpOffset).getValueType().getSizeInBits() &&
"POWI exponent should match with sizeof(int) when doing the libcall.");
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
- SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- std::pair<SDValue, SDValue> Tmp =
- TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops,
- CallOptions, SDLoc(N), SDValue());
+ SDValue Ops[2] = {N->getOperand(0 + OpOffset), N->getOperand(1 + OpOffset)};
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(
+ DAG, LC, N->getValueType(0), Ops, CallOptions, SDLoc(N), Chain);
ReplaceValueWith(SDValue(N, 0), Tmp.first);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
return SDValue();
}
@@ -2255,7 +2252,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ARITH_FENCE: SplitRes_ARITH_FENCE(N, Lo, Hi); break;
case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
- case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_Select(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
case ISD::FREEZE: SplitRes_FREEZE(N, Lo, Hi); break;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index da282ecad282..4d8daa82d8c0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -334,18 +334,17 @@ private:
SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N);
SDValue PromoteIntRes_Overflow(SDNode *N);
SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
- SDValue PromoteIntRes_SELECT(SDNode *N);
- SDValue PromoteIntRes_VSELECT(SDNode *N);
+ SDValue PromoteIntRes_Select(SDNode *N);
SDValue PromoteIntRes_SELECT_CC(SDNode *N);
SDValue PromoteIntRes_SETCC(SDNode *N);
- SDValue PromoteIntRes_SHL(SDNode *N, bool IsVP);
- SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N, bool IsVP);
- SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N, bool IsVP);
- SDValue PromoteIntRes_SExtIntBinOp(SDNode *N, bool IsVP);
+ SDValue PromoteIntRes_SHL(SDNode *N);
+ SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_SExtIntBinOp(SDNode *N);
SDValue PromoteIntRes_UMINUMAX(SDNode *N);
SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
- SDValue PromoteIntRes_SRA(SDNode *N, bool IsVP);
- SDValue PromoteIntRes_SRL(SDNode *N, bool IsVP);
+ SDValue PromoteIntRes_SRA(SDNode *N);
+ SDValue PromoteIntRes_SRL(SDNode *N);
SDValue PromoteIntRes_TRUNCATE(SDNode *N);
SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo);
@@ -819,6 +818,12 @@ private:
void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi);
void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);
+ /// Split mask operator of a VP intrinsic.
+ std::pair<SDValue, SDValue> SplitMask(SDValue Mask);
+
+ /// Split mask operator of a VP intrinsic in a given location.
+ std::pair<SDValue, SDValue> SplitMask(SDValue Mask, const SDLoc &DL);
+
// Helper function for incrementing the pointer when splitting
// memory operations
void IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI,
@@ -826,7 +831,7 @@ private:
// Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
void SplitVectorResult(SDNode *N, unsigned ResNo);
- void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi, bool IsVP);
+ void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -847,8 +852,10 @@ private:
void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_Gather(MemSDNode *VPGT, SDValue &Lo, SDValue &Hi,
+ bool SplitSETCC = false);
void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -864,6 +871,7 @@ private:
SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_VECREDUCE_SEQ(SDNode *N);
+ SDValue SplitVecOp_VP_REDUCE(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_UnaryOp(SDNode *N);
SDValue SplitVecOp_TruncateHelper(SDNode *N);
@@ -873,9 +881,10 @@ private:
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue SplitVecOp_ExtVecInRegOp(SDNode *N);
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
- SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
- SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, unsigned OpNo);
+ SDValue SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_Gather(MemSDNode *MGT, unsigned OpNo);
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
SDValue SplitVecOp_VSETCC(SDNode *N);
SDValue SplitVecOp_FP_ROUND(SDNode *N);
@@ -900,6 +909,23 @@ private:
}
void SetWidenedVector(SDValue Op, SDValue Result);
+ /// Given a mask Mask, returns the larger vector into which Mask was widened.
+ SDValue GetWidenedMask(SDValue Mask, ElementCount EC) {
+ // For VP operations, we must also widen the mask. Note that the mask type
+ // may not actually need widening, leading it be split along with the VP
+ // operation.
+ // FIXME: This could lead to an infinite split/widen loop. We only handle
+ // the case where the mask needs widening to an identically-sized type as
+ // the vector inputs.
+ assert(getTypeAction(Mask.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen binary VP op");
+ Mask = GetWidenedVector(Mask);
+ assert(Mask.getValueType().getVectorElementCount() == EC &&
+ "Unable to widen binary VP op");
+ return Mask;
+ }
+
// Widen Vector Result Promotion.
void WidenVectorResult(SDNode *N, unsigned ResNo);
SDValue WidenVecRes_MERGE_VALUES(SDNode* N, unsigned ResNo);
@@ -911,10 +937,12 @@ private:
SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
SDValue WidenVecRes_LOAD(SDNode* N);
+ SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N);
+ SDValue WidenVecRes_VP_GATHER(VPGatherSDNode* N);
SDValue WidenVecRes_ScalarOp(SDNode* N);
- SDValue WidenVecRes_SELECT(SDNode* N);
+ SDValue WidenVecRes_Select(SDNode *N);
SDValue WidenVSELECTMask(SDNode *N);
SDValue WidenVecRes_SELECT_CC(SDNode* N);
SDValue WidenVecRes_SETCC(SDNode* N);
@@ -923,7 +951,7 @@ private:
SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
SDValue WidenVecRes_Ternary(SDNode *N);
- SDValue WidenVecRes_Binary(SDNode *N, bool IsVP);
+ SDValue WidenVecRes_Binary(SDNode *N);
SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N);
SDValue WidenVecRes_StrictFP(SDNode *N);
@@ -945,9 +973,11 @@ private:
SDValue WidenVecOp_INSERT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
+ SDValue WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo);
SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
+ SDValue WidenVecOp_VP_SCATTER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_SETCC(SDNode* N);
SDValue WidenVecOp_STRICT_FSETCC(SDNode* N);
SDValue WidenVecOp_VSELECT(SDNode *N);
@@ -957,6 +987,7 @@ private:
SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
SDValue WidenVecOp_VECREDUCE(SDNode *N);
SDValue WidenVecOp_VECREDUCE_SEQ(SDNode *N);
+ SDValue WidenVecOp_VP_REDUCE(SDNode *N);
/// Helper function to generate a set of operations to perform
/// a vector operation for a wider type.
@@ -1023,7 +1054,7 @@ private:
void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
SDValue &Lo, SDValue &Hi);
void SplitRes_ARITH_FENCE (SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_Select (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 3d3c9a2ad837..c6885677d644 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -506,9 +506,10 @@ void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
GetSplitOp(Op, Lo, Hi);
}
-void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) {
+void DAGTypeLegalizer::SplitRes_Select(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue LL, LH, RL, RH, CL, CH;
SDLoc dl(N);
+ unsigned Opcode = N->getOpcode();
GetSplitOp(N->getOperand(1), LL, LH);
GetSplitOp(N->getOperand(2), RL, RH);
@@ -539,8 +540,18 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) {
std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
}
- Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL);
- Hi = DAG.getNode(N->getOpcode(), dl, LH.getValueType(), CH, LH, RH);
+ if (Opcode != ISD::VP_SELECT && Opcode != ISD::VP_MERGE) {
+ Lo = DAG.getNode(Opcode, dl, LL.getValueType(), CL, LL, RL);
+ Hi = DAG.getNode(Opcode, dl, LH.getValueType(), CH, LH, RH);
+ return;
+ }
+
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(3), N->getValueType(0), dl);
+
+ Lo = DAG.getNode(Opcode, dl, LL.getValueType(), CL, LL, RL, EVLLo);
+ Hi = DAG.getNode(Opcode, dl, LH.getValueType(), CH, LH, RH, EVLHi);
}
void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 1493f36fcd3e..abf6a3ac6916 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -133,6 +133,8 @@ class VectorLegalizer {
/// Implement vselect in terms of XOR, AND, OR when blend is not
/// supported by the target.
SDValue ExpandVSELECT(SDNode *Node);
+ SDValue ExpandVP_SELECT(SDNode *Node);
+ SDValue ExpandVP_MERGE(SDNode *Node);
SDValue ExpandSELECT(SDNode *Node);
std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
SDValue ExpandStore(SDNode *N);
@@ -457,6 +459,14 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
break;
}
+
+#define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...) \
+ case ISD::VPID: { \
+ EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \
+ : Node->getOperand(LEGALPOS).getValueType(); \
+ Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \
+ } break;
+#include "llvm/IR/VPIntrinsics.def"
}
LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
@@ -718,6 +728,9 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::VSELECT:
Results.push_back(ExpandVSELECT(Node));
return;
+ case ISD::VP_SELECT:
+ Results.push_back(ExpandVP_SELECT(Node));
+ return;
case ISD::SELECT:
Results.push_back(ExpandSELECT(Node));
return;
@@ -865,6 +878,9 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::UREM:
ExpandREM(Node, Results);
return;
+ case ISD::VP_MERGE:
+ Results.push_back(ExpandVP_MERGE(Node));
+ return;
}
Results.push_back(DAG.UnrollVectorOp(Node));
@@ -1195,6 +1211,79 @@ SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
}
+SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) {
+ // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which
+ // do not support it natively.
+ SDLoc DL(Node);
+
+ SDValue Mask = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDValue Op2 = Node->getOperand(2);
+ SDValue EVL = Node->getOperand(3);
+
+ EVT VT = Mask.getValueType();
+
+ // If we can't even use the basic vector operations of
+ // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op.
+ if (TLI.getOperationAction(ISD::VP_AND, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::VP_XOR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::VP_OR, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Node);
+
+ // This operation also isn't safe when the operands aren't also booleans.
+ if (Op1.getValueType().getVectorElementType() != MVT::i1)
+ return DAG.UnrollVectorOp(Node);
+
+ SDValue Ones = DAG.getAllOnesConstant(DL, VT);
+ SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Mask, EVL);
+
+ Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Mask, EVL);
+ Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Mask, EVL);
+ return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Mask, EVL);
+}
+
+SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) {
+ // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector
+ // indices less than the EVL/pivot are true. Combine that with the original
+ // mask for a full-length mask. Use a full-length VSELECT to select between
+ // the true and false values.
+ SDLoc DL(Node);
+
+ SDValue Mask = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDValue Op2 = Node->getOperand(2);
+ SDValue EVL = Node->getOperand(3);
+
+ EVT MaskVT = Mask.getValueType();
+ bool IsFixedLen = MaskVT.isFixedLengthVector();
+
+ EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), EVL.getValueType(),
+ MaskVT.getVectorElementCount());
+
+ // If we can't construct the EVL mask efficiently, it's better to unroll.
+ if ((IsFixedLen &&
+ !TLI.isOperationLegalOrCustom(ISD::BUILD_VECTOR, EVLVecVT)) ||
+ (!IsFixedLen &&
+ (!TLI.isOperationLegalOrCustom(ISD::STEP_VECTOR, EVLVecVT) ||
+ !TLI.isOperationLegalOrCustom(ISD::SPLAT_VECTOR, EVLVecVT))))
+ return DAG.UnrollVectorOp(Node);
+
+ // If using a SETCC would result in a different type than the mask type,
+ // unroll.
+ if (TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ EVLVecVT) != MaskVT)
+ return DAG.UnrollVectorOp(Node);
+
+ SDValue StepVec = DAG.getStepVector(DL, EVLVecVT);
+ SDValue SplatEVL = IsFixedLen ? DAG.getSplatBuildVector(EVLVecVT, DL, EVL)
+ : DAG.getSplatVector(EVLVecVT, DL, EVL);
+ SDValue EVLMask =
+ DAG.getSetCC(DL, MaskVT, StepVec, SplatEVL, ISD::CondCode::SETULT);
+
+ SDValue FullMask = DAG.getNode(ISD::AND, DL, MaskVT, Mask, EVLMask);
+ return DAG.getSelect(DL, Node->getValueType(0), FullMask, Op1, Op2);
+}
+
void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
// Attempt to expand using TargetLowering.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 7ec2638b1e71..0bd44ce4c872 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -914,7 +914,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
case ISD::VSELECT:
- case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT:
+ case ISD::VP_MERGE:
+ case ISD::VP_SELECT: SplitRes_Select(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break;
@@ -936,11 +938,15 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::LOAD:
SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
break;
+ case ISD::VP_LOAD:
+ SplitVecRes_VP_LOAD(cast<VPLoadSDNode>(N), Lo, Hi);
+ break;
case ISD::MLOAD:
SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi);
break;
case ISD::MGATHER:
- SplitVecRes_MGATHER(cast<MaskedGatherSDNode>(N), Lo, Hi);
+ case ISD::VP_GATHER:
+ SplitVecRes_Gather(cast<MemSDNode>(N), Lo, Hi, /*SplitSETCC*/ true);
break;
case ISD::SETCC:
SplitVecRes_SETCC(N, Lo, Hi);
@@ -1008,31 +1014,31 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_ExtendOp(N, Lo, Hi);
break;
- case ISD::ADD:
- case ISD::SUB:
- case ISD::MUL:
+ case ISD::ADD: case ISD::VP_ADD:
+ case ISD::SUB: case ISD::VP_SUB:
+ case ISD::MUL: case ISD::VP_MUL:
case ISD::MULHS:
case ISD::MULHU:
- case ISD::FADD:
- case ISD::FSUB:
- case ISD::FMUL:
+ case ISD::FADD: case ISD::VP_FADD:
+ case ISD::FSUB: case ISD::VP_FSUB:
+ case ISD::FMUL: case ISD::VP_FMUL:
case ISD::FMINNUM:
case ISD::FMAXNUM:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
- case ISD::SDIV:
- case ISD::UDIV:
- case ISD::FDIV:
+ case ISD::SDIV: case ISD::VP_SDIV:
+ case ISD::UDIV: case ISD::VP_UDIV:
+ case ISD::FDIV: case ISD::VP_FDIV:
case ISD::FPOW:
- case ISD::AND:
- case ISD::OR:
- case ISD::XOR:
- case ISD::SHL:
- case ISD::SRA:
- case ISD::SRL:
- case ISD::UREM:
- case ISD::SREM:
- case ISD::FREM:
+ case ISD::AND: case ISD::VP_AND:
+ case ISD::OR: case ISD::VP_OR:
+ case ISD::XOR: case ISD::VP_XOR:
+ case ISD::SHL: case ISD::VP_SHL:
+ case ISD::SRA: case ISD::VP_ASHR:
+ case ISD::SRL: case ISD::VP_LSHR:
+ case ISD::UREM: case ISD::VP_UREM:
+ case ISD::SREM: case ISD::VP_SREM:
+ case ISD::FREM: case ISD::VP_FREM:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
@@ -1045,7 +1051,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::USHLSAT:
case ISD::ROTL:
case ISD::ROTR:
- SplitVecRes_BinOp(N, Lo, Hi, /*IsVP*/ false);
+ SplitVecRes_BinOp(N, Lo, Hi);
break;
case ISD::FMA:
case ISD::FSHL:
@@ -1082,26 +1088,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UDIVFIXSAT:
SplitVecRes_FIX(N, Lo, Hi);
break;
- case ISD::VP_ADD:
- case ISD::VP_AND:
- case ISD::VP_MUL:
- case ISD::VP_OR:
- case ISD::VP_SUB:
- case ISD::VP_XOR:
- case ISD::VP_SHL:
- case ISD::VP_LSHR:
- case ISD::VP_ASHR:
- case ISD::VP_SDIV:
- case ISD::VP_UDIV:
- case ISD::VP_SREM:
- case ISD::VP_UREM:
- case ISD::VP_FADD:
- case ISD::VP_FSUB:
- case ISD::VP_FMUL:
- case ISD::VP_FDIV:
- case ISD::VP_FREM:
- SplitVecRes_BinOp(N, Lo, Hi, /*IsVP*/ true);
- break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1133,8 +1119,22 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
}
}
-void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi,
- bool IsVP) {
+std::pair<SDValue, SDValue> DAGTypeLegalizer::SplitMask(SDValue Mask) {
+ return SplitMask(Mask, SDLoc(Mask));
+}
+
+std::pair<SDValue, SDValue> DAGTypeLegalizer::SplitMask(SDValue Mask,
+ const SDLoc &DL) {
+ SDValue MaskLo, MaskHi;
+ EVT MaskVT = Mask.getValueType();
+ if (getTypeAction(MaskVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+ return std::make_pair(MaskLo, MaskHi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue LHSLo, LHSHi;
GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
SDValue RHSLo, RHSHi;
@@ -1143,36 +1143,21 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi,
const SDNodeFlags Flags = N->getFlags();
unsigned Opcode = N->getOpcode();
- if (!IsVP) {
+ if (N->getNumOperands() == 2) {
Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
return;
}
- // Split the mask.
+ assert(N->getNumOperands() == 4 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+
SDValue MaskLo, MaskHi;
- SDValue Mask = N->getOperand(2);
- EVT MaskVT = Mask.getValueType();
- if (getTypeAction(MaskVT) == TargetLowering::TypeSplitVector)
- GetSplitVector(Mask, MaskLo, MaskHi);
- else
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, SDLoc(Mask));
-
- // Split the vector length parameter.
- // %evl -> umin(%evl, %halfnumelts) and usubsat(%evl - %halfnumelts).
- SDValue EVL = N->getOperand(3);
- EVT VecVT = N->getValueType(0);
- EVT EVLVT = EVL.getValueType();
- assert(VecVT.getVectorElementCount().isKnownEven() &&
- "Expecting the mask to be an evenly-sized vector");
- unsigned HalfMinNumElts = VecVT.getVectorMinNumElements() / 2;
- SDValue HalfNumElts =
- VecVT.isFixedLengthVector()
- ? DAG.getConstant(HalfMinNumElts, dl, EVLVT)
- : DAG.getVScale(dl, EVLVT,
- APInt(EVLVT.getScalarSizeInBits(), HalfMinNumElts));
- SDValue EVLLo = DAG.getNode(ISD::UMIN, dl, EVLVT, EVL, HalfNumElts);
- SDValue EVLHi = DAG.getNode(ISD::USUBSAT, dl, EVLVT, EVL, HalfNumElts);
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(2));
+
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(3), N->getValueType(0), dl);
Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(),
{LHSLo, RHSLo, MaskLo, EVLLo}, Flags);
@@ -1781,6 +1766,86 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
ReplaceValueWith(SDValue(LD, 1), Ch);
}
+void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo,
+ SDValue &Hi) {
+ assert(LD->isUnindexed() && "Indexed VP load during type legalization!");
+ EVT LoVT, HiVT;
+ SDLoc dl(LD);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ SDValue Ch = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ SDValue Offset = LD->getOffset();
+ assert(Offset.isUndef() && "Unexpected indexed variable-length load offset");
+ Align Alignment = LD->getOriginalAlign();
+ SDValue Mask = LD->getMask();
+ SDValue EVL = LD->getVectorLength();
+ EVT MemoryVT = LD->getMemoryVT();
+
+ EVT LoMemVT, HiMemVT;
+ bool HiIsEmpty = false;
+ std::tie(LoMemVT, HiMemVT) =
+ DAG.GetDependentSplitDestVTs(MemoryVT, LoVT, &HiIsEmpty);
+
+ // Split Mask operand
+ SDValue MaskLo, MaskHi;
+ if (Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ }
+
+ // Split EVL operand
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) = DAG.SplitEVL(EVL, LD->getValueType(0), dl);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ LD->getPointerInfo(), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, Alignment, LD->getAAInfo(), LD->getRanges());
+
+ Lo =
+ DAG.getLoadVP(LD->getAddressingMode(), ExtType, LoVT, dl, Ch, Ptr, Offset,
+ MaskLo, EVLLo, LoMemVT, MMO, LD->isExpandingLoad());
+
+ if (HiIsEmpty) {
+ // The hi vp_load has zero storage size. We therefore simply set it to
+ // the low vp_load and rely on subsequent removal from the chain.
+ Hi = Lo;
+ } else {
+ // Generate hi vp_load.
+ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
+ LD->isExpandingLoad());
+
+ MachinePointerInfo MPI;
+ if (LoMemVT.isScalableVector())
+ MPI = MachinePointerInfo(LD->getPointerInfo().getAddrSpace());
+ else
+ MPI = LD->getPointerInfo().getWithOffset(
+ LoMemVT.getStoreSize().getFixedSize());
+
+ MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment,
+ LD->getAAInfo(), LD->getRanges());
+
+ Hi = DAG.getLoadVP(LD->getAddressingMode(), ExtType, HiVT, dl, Ch, Ptr,
+ Offset, MaskHi, EVLHi, HiMemVT, MMO,
+ LD->isExpandingLoad());
+ }
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(LD, 1), Ch);
+}
+
void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
SDValue &Lo, SDValue &Hi) {
assert(MLD->isUnindexed() && "Indexed masked load during type legalization!");
@@ -1865,61 +1930,85 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
}
-void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
- SDValue &Lo, SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_Gather(MemSDNode *N, SDValue &Lo,
+ SDValue &Hi, bool SplitSETCC) {
EVT LoVT, HiVT;
- SDLoc dl(MGT);
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0));
-
- SDValue Ch = MGT->getChain();
- SDValue Ptr = MGT->getBasePtr();
- SDValue Mask = MGT->getMask();
- SDValue PassThru = MGT->getPassThru();
- SDValue Index = MGT->getIndex();
- SDValue Scale = MGT->getScale();
- EVT MemoryVT = MGT->getMemoryVT();
- Align Alignment = MGT->getOriginalAlign();
- ISD::LoadExtType ExtType = MGT->getExtensionType();
+ SDLoc dl(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ struct Operands {
+ SDValue Mask;
+ SDValue Index;
+ SDValue Scale;
+ } Ops = [&]() -> Operands {
+ if (auto *MSC = dyn_cast<MaskedGatherSDNode>(N)) {
+ return {MSC->getMask(), MSC->getIndex(), MSC->getScale()};
+ }
+ auto *VPSC = cast<VPGatherSDNode>(N);
+ return {VPSC->getMask(), VPSC->getIndex(), VPSC->getScale()};
+ }();
+
+ EVT MemoryVT = N->getMemoryVT();
+ Align Alignment = N->getOriginalAlign();
// Split Mask operand
SDValue MaskLo, MaskHi;
- if (Mask.getOpcode() == ISD::SETCC) {
- SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ if (SplitSETCC && Ops.Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Ops.Mask.getNode(), MaskLo, MaskHi);
} else {
- if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
- GetSplitVector(Mask, MaskLo, MaskHi);
- else
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ std::tie(MaskLo, MaskHi) = SplitMask(Ops.Mask, dl);
}
EVT LoMemVT, HiMemVT;
// Split MemoryVT
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
- SDValue PassThruLo, PassThruHi;
- if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
- GetSplitVector(PassThru, PassThruLo, PassThruHi);
- else
- std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
-
SDValue IndexHi, IndexLo;
- if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
- GetSplitVector(Index, IndexLo, IndexHi);
+ if (getTypeAction(Ops.Index.getValueType()) ==
+ TargetLowering::TypeSplitVector)
+ GetSplitVector(Ops.Index, IndexLo, IndexHi);
else
- std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, dl);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MGT->getPointerInfo(), MachineMemOperand::MOLoad,
- MemoryLocation::UnknownSize, Alignment, MGT->getAAInfo(),
- MGT->getRanges());
+ N->getPointerInfo(), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
+
+ if (auto *MGT = dyn_cast<MaskedGatherSDNode>(N)) {
+ SDValue PassThru = MGT->getPassThru();
+ SDValue PassThruLo, PassThruHi;
+ if (getTypeAction(PassThru.getValueType()) ==
+ TargetLowering::TypeSplitVector)
+ GetSplitVector(PassThru, PassThruLo, PassThruHi);
+ else
+ std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
- SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
- Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, OpsLo,
- MMO, MGT->getIndexType(), ExtType);
+ ISD::LoadExtType ExtType = MGT->getExtensionType();
+ ISD::MemIndexType IndexTy = MGT->getIndexType();
- SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
- Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, OpsHi,
- MMO, MGT->getIndexType(), ExtType);
+ SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Ops.Scale};
+ Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl,
+ OpsLo, MMO, IndexTy, ExtType);
+
+ SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Ops.Scale};
+ Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl,
+ OpsHi, MMO, IndexTy, ExtType);
+ } else {
+ auto *VPGT = cast<VPGatherSDNode>(N);
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(VPGT->getVectorLength(), MemoryVT, dl);
+
+ SDValue OpsLo[] = {Ch, Ptr, IndexLo, Ops.Scale, MaskLo, EVLLo};
+ Lo = DAG.getGatherVP(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, OpsLo,
+ MMO, VPGT->getIndexType());
+
+ SDValue OpsHi[] = {Ch, Ptr, IndexHi, Ops.Scale, MaskHi, EVLHi};
+ Hi = DAG.getGatherVP(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, OpsHi,
+ MMO, VPGT->getIndexType());
+ }
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1928,10 +2017,9 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
- ReplaceValueWith(SDValue(MGT, 1), Ch);
+ ReplaceValueWith(SDValue(N, 1), Ch);
}
-
void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
assert(N->getValueType(0).isVector() &&
N->getOperand(0).getValueType().isVector() &&
@@ -2221,14 +2309,19 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
+ case ISD::VP_STORE:
+ Res = SplitVecOp_VP_STORE(cast<VPStoreSDNode>(N), OpNo);
+ break;
case ISD::MSTORE:
Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
break;
case ISD::MSCATTER:
- Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo);
+ case ISD::VP_SCATTER:
+ Res = SplitVecOp_Scatter(cast<MemSDNode>(N), OpNo);
break;
case ISD::MGATHER:
- Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo);
+ case ISD::VP_GATHER:
+ Res = SplitVecOp_Gather(cast<MemSDNode>(N), OpNo);
break;
case ISD::VSELECT:
Res = SplitVecOp_VSELECT(N, OpNo);
@@ -2285,6 +2378,23 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VECREDUCE_SEQ_FMUL:
Res = SplitVecOp_VECREDUCE_SEQ(N);
break;
+ case ISD::VP_REDUCE_FADD:
+ case ISD::VP_REDUCE_SEQ_FADD:
+ case ISD::VP_REDUCE_FMUL:
+ case ISD::VP_REDUCE_SEQ_FMUL:
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ case ISD::VP_REDUCE_FMAX:
+ case ISD::VP_REDUCE_FMIN:
+ Res = SplitVecOp_VP_REDUCE(N, OpNo);
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -2381,6 +2491,33 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE_SEQ(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, Hi, Flags);
}
+SDValue DAGTypeLegalizer::SplitVecOp_VP_REDUCE(SDNode *N, unsigned OpNo) {
+ assert(N->isVPOpcode() && "Expected VP opcode");
+ assert(OpNo == 1 && "Can only split reduce vector operand");
+
+ unsigned Opc = N->getOpcode();
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ SDLoc dl(N);
+
+ SDValue VecOp = N->getOperand(OpNo);
+ EVT VecVT = VecOp.getValueType();
+ assert(VecVT.isVector() && "Can only split reduce vector operand");
+ GetSplitVector(VecOp, Lo, Hi);
+
+ SDValue MaskLo, MaskHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(2));
+
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) = DAG.SplitEVL(N->getOperand(3), VecVT, dl);
+
+ const SDNodeFlags Flags = N->getFlags();
+
+ SDValue ResLo =
+ DAG.getNode(Opc, dl, ResVT, {N->getOperand(0), Lo, MaskLo, EVLLo}, Flags);
+ return DAG.getNode(Opc, dl, ResVT, {ResLo, Hi, MaskHi, EVLHi}, Flags);
+}
+
SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
// The result has a legal vector type, but the input needs splitting.
EVT ResVT = N->getValueType(0);
@@ -2558,70 +2695,92 @@ SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) {
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), N->getValueType(0), Lo, Hi);
}
-SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
- unsigned OpNo) {
- EVT LoVT, HiVT;
- SDLoc dl(MGT);
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0));
-
- SDValue Ch = MGT->getChain();
- SDValue Ptr = MGT->getBasePtr();
- SDValue Index = MGT->getIndex();
- SDValue Scale = MGT->getScale();
- SDValue Mask = MGT->getMask();
- SDValue PassThru = MGT->getPassThru();
- Align Alignment = MGT->getOriginalAlign();
- ISD::LoadExtType ExtType = MGT->getExtensionType();
+SDValue DAGTypeLegalizer::SplitVecOp_Gather(MemSDNode *N, unsigned OpNo) {
+ (void)OpNo;
+ SDValue Lo, Hi;
+ SplitVecRes_Gather(N, Lo, Hi);
- SDValue MaskLo, MaskHi;
- if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
- // Split Mask operand
- GetSplitVector(Mask, MaskLo, MaskHi);
- else
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, N, N->getValueType(0), Lo, Hi);
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+}
- EVT MemoryVT = MGT->getMemoryVT();
- EVT LoMemVT, HiMemVT;
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+SDValue DAGTypeLegalizer::SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo) {
+ assert(N->isUnindexed() && "Indexed vp_store of vector?");
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ SDValue Offset = N->getOffset();
+ assert(Offset.isUndef() && "Unexpected VP store offset");
+ SDValue Mask = N->getMask();
+ SDValue EVL = N->getVectorLength();
+ SDValue Data = N->getValue();
+ Align Alignment = N->getOriginalAlign();
+ SDLoc DL(N);
- SDValue PassThruLo, PassThruHi;
- if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
- GetSplitVector(PassThru, PassThruLo, PassThruHi);
+ SDValue DataLo, DataHi;
+ if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Data operand
+ GetSplitVector(Data, DataLo, DataHi);
else
- std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
+ std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
- SDValue IndexHi, IndexLo;
- if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
- GetSplitVector(Index, IndexLo, IndexHi);
- else
- std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
+ // Split Mask operand
+ SDValue MaskLo, MaskHi;
+ if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+ }
+ EVT MemoryVT = N->getMemoryVT();
+ EVT LoMemVT, HiMemVT;
+ bool HiIsEmpty = false;
+ std::tie(LoMemVT, HiMemVT) =
+ DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty);
+
+ // Split EVL
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) = DAG.SplitEVL(EVL, Data.getValueType(), DL);
+
+ SDValue Lo, Hi;
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MGT->getPointerInfo(), MachineMemOperand::MOLoad,
- MemoryLocation::UnknownSize, Alignment, MGT->getAAInfo(),
- MGT->getRanges());
+ N->getPointerInfo(), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
- SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
- SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl,
- OpsLo, MMO, MGT->getIndexType(), ExtType);
+ Lo = DAG.getStoreVP(Ch, DL, DataLo, Ptr, Offset, MaskLo, EVLLo, LoMemVT, MMO,
+ N->getAddressingMode(), N->isTruncatingStore(),
+ N->isCompressingStore());
- SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
- SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl,
- OpsHi, MMO, MGT->getIndexType(), ExtType);
+ // If the hi vp_store has zero storage size, only the lo vp_store is needed.
+ if (HiIsEmpty)
+ return Lo;
- // Build a factor node to remember that this load is independent of the
- // other one.
- Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
+ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
+ N->isCompressingStore());
- // Legalize the chain result - switch anything that used the old chain to
- // use the new one.
- ReplaceValueWith(SDValue(MGT, 1), Ch);
+ MachinePointerInfo MPI;
+ if (LoMemVT.isScalableVector()) {
+ Alignment = commonAlignment(Alignment,
+ LoMemVT.getSizeInBits().getKnownMinSize() / 8);
+ MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
+ } else
+ MPI = N->getPointerInfo().getWithOffset(
+ LoMemVT.getStoreSize().getFixedSize());
- SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MGT->getValueType(0), Lo,
- Hi);
- ReplaceValueWith(SDValue(MGT, 0), Res);
- return SDValue();
+ MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment,
+ N->getAAInfo(), N->getRanges());
+
+ Hi = DAG.getStoreVP(Ch, DL, DataHi, Ptr, Offset, MaskHi, EVLHi, HiMemVT, MMO,
+ N->getAddressingMode(), N->isTruncatingStore(),
+ N->isCompressingStore());
+
+ // Build a factor node to remember that this store is independent of the
+ // other one.
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
}
SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
@@ -2703,64 +2862,87 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
return Res;
}
-SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
- unsigned OpNo) {
- SDValue Ch = N->getChain();
+SDValue DAGTypeLegalizer::SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo) {
+ SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
- SDValue Mask = N->getMask();
- SDValue Index = N->getIndex();
- SDValue Scale = N->getScale();
- SDValue Data = N->getValue();
EVT MemoryVT = N->getMemoryVT();
Align Alignment = N->getOriginalAlign();
SDLoc DL(N);
-
+ struct Operands {
+ SDValue Mask;
+ SDValue Index;
+ SDValue Scale;
+ SDValue Data;
+ } Ops = [&]() -> Operands {
+ if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) {
+ return {MSC->getMask(), MSC->getIndex(), MSC->getScale(),
+ MSC->getValue()};
+ }
+ auto *VPSC = cast<VPScatterSDNode>(N);
+ return {VPSC->getMask(), VPSC->getIndex(), VPSC->getScale(),
+ VPSC->getValue()};
+ }();
// Split all operands
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
SDValue DataLo, DataHi;
- if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
+ if (getTypeAction(Ops.Data.getValueType()) == TargetLowering::TypeSplitVector)
// Split Data operand
- GetSplitVector(Data, DataLo, DataHi);
+ GetSplitVector(Ops.Data, DataLo, DataHi);
else
- std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+ std::tie(DataLo, DataHi) = DAG.SplitVector(Ops.Data, DL);
// Split Mask operand
SDValue MaskLo, MaskHi;
- if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {
- SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ if (OpNo == 1 && Ops.Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Ops.Mask.getNode(), MaskLo, MaskHi);
} else {
- if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
- GetSplitVector(Mask, MaskLo, MaskHi);
- else
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+ std::tie(MaskLo, MaskHi) = SplitMask(Ops.Mask, DL);
}
SDValue IndexHi, IndexLo;
- if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
- GetSplitVector(Index, IndexLo, IndexHi);
+ if (getTypeAction(Ops.Index.getValueType()) ==
+ TargetLowering::TypeSplitVector)
+ GetSplitVector(Ops.Index, IndexLo, IndexHi);
else
- std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, DL);
SDValue Lo;
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
N->getPointerInfo(), MachineMemOperand::MOStore,
MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
- SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale};
- Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), LoMemVT,
- DL, OpsLo, MMO, N->getIndexType(),
- N->isTruncatingStore());
+ if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) {
+ SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Ops.Scale};
+ Lo =
+ DAG.getMaskedScatter(DAG.getVTList(MVT::Other), LoMemVT, DL, OpsLo, MMO,
+ MSC->getIndexType(), MSC->isTruncatingStore());
+
+ // The order of the Scatter operation after split is well defined. The "Hi"
+ // part comes after the "Lo". So these two operations should be chained one
+ // after another.
+ SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Ops.Scale};
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), HiMemVT, DL, OpsHi,
+ MMO, MSC->getIndexType(),
+ MSC->isTruncatingStore());
+ }
+ auto *VPSC = cast<VPScatterSDNode>(N);
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(VPSC->getVectorLength(), Ops.Data.getValueType(), DL);
+
+ SDValue OpsLo[] = {Ch, DataLo, Ptr, IndexLo, Ops.Scale, MaskLo, EVLLo};
+ Lo = DAG.getScatterVP(DAG.getVTList(MVT::Other), LoMemVT, DL, OpsLo, MMO,
+ VPSC->getIndexType());
// The order of the Scatter operation after split is well defined. The "Hi"
// part comes after the "Lo". So these two operations should be chained one
// after another.
- SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale};
- return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), HiMemVT,
- DL, OpsHi, MMO, N->getIndexType(),
- N->isTruncatingStore());
+ SDValue OpsHi[] = {Lo, DataHi, Ptr, IndexHi, Ops.Scale, MaskHi, EVLHi};
+ return DAG.getScatterVP(DAG.getVTList(MVT::Other), HiMemVT, DL, OpsHi, MMO,
+ VPSC->getIndexType());
}
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -3047,31 +3229,41 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
break;
case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
case ISD::VSELECT:
- case ISD::SELECT: Res = WidenVecRes_SELECT(N); break;
+ case ISD::SELECT:
+ case ISD::VP_SELECT:
+ case ISD::VP_MERGE:
+ Res = WidenVecRes_Select(N);
+ break;
case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
case ISD::SETCC: Res = WidenVecRes_SETCC(N); break;
case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break;
case ISD::VECTOR_SHUFFLE:
Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
break;
+ case ISD::VP_LOAD:
+ Res = WidenVecRes_VP_LOAD(cast<VPLoadSDNode>(N));
+ break;
case ISD::MLOAD:
Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N));
break;
case ISD::MGATHER:
Res = WidenVecRes_MGATHER(cast<MaskedGatherSDNode>(N));
break;
+ case ISD::VP_GATHER:
+ Res = WidenVecRes_VP_GATHER(cast<VPGatherSDNode>(N));
+ break;
- case ISD::ADD:
- case ISD::AND:
- case ISD::MUL:
+ case ISD::ADD: case ISD::VP_ADD:
+ case ISD::AND: case ISD::VP_AND:
+ case ISD::MUL: case ISD::VP_MUL:
case ISD::MULHS:
case ISD::MULHU:
- case ISD::OR:
- case ISD::SUB:
- case ISD::XOR:
- case ISD::SHL:
- case ISD::SRA:
- case ISD::SRL:
+ case ISD::OR: case ISD::VP_OR:
+ case ISD::SUB: case ISD::VP_SUB:
+ case ISD::XOR: case ISD::VP_XOR:
+ case ISD::SHL: case ISD::VP_SHL:
+ case ISD::SRA: case ISD::VP_ASHR:
+ case ISD::SRL: case ISD::VP_LSHR:
case ISD::FMINNUM:
case ISD::FMAXNUM:
case ISD::FMINIMUM:
@@ -3088,7 +3280,21 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::USHLSAT:
case ISD::ROTL:
case ISD::ROTR:
- Res = WidenVecRes_Binary(N, /*IsVP*/ false);
+ // Vector-predicated binary op widening. Note that -- unlike the
+ // unpredicated versions -- we don't have to worry about trapping on
+ // operations like UDIV, FADD, etc., as we pass on the original vector
+ // length parameter. This means the widened elements containing garbage
+ // aren't active.
+ case ISD::VP_SDIV:
+ case ISD::VP_UDIV:
+ case ISD::VP_SREM:
+ case ISD::VP_UREM:
+ case ISD::VP_FADD:
+ case ISD::VP_FSUB:
+ case ISD::VP_FMUL:
+ case ISD::VP_FDIV:
+ case ISD::VP_FREM:
+ Res = WidenVecRes_Binary(N);
break;
case ISD::FADD:
@@ -3212,31 +3418,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FSHR:
Res = WidenVecRes_Ternary(N);
break;
- case ISD::VP_ADD:
- case ISD::VP_AND:
- case ISD::VP_MUL:
- case ISD::VP_OR:
- case ISD::VP_SUB:
- case ISD::VP_XOR:
- case ISD::VP_SHL:
- case ISD::VP_LSHR:
- case ISD::VP_ASHR:
- case ISD::VP_SDIV:
- case ISD::VP_UDIV:
- case ISD::VP_SREM:
- case ISD::VP_UREM:
- case ISD::VP_FADD:
- case ISD::VP_FSUB:
- case ISD::VP_FMUL:
- case ISD::VP_FDIV:
- case ISD::VP_FREM:
- // Vector-predicated binary op widening. Note that -- unlike the
- // unpredicated versions -- we don't have to worry about trapping on
- // operations like UDIV, FADD, etc., as we pass on the original vector
- // length parameter. This means the widened elements containing garbage
- // aren't active.
- Res = WidenVecRes_Binary(N, /*IsVP*/ true);
- break;
}
// If Res is null, the sub-method took care of registering the result.
@@ -3254,29 +3435,21 @@ SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
}
-SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N, bool IsVP) {
+SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
// Binary op widening.
SDLoc dl(N);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- if (!IsVP)
+ if (N->getNumOperands() == 2)
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2,
N->getFlags());
- // For VP operations, we must also widen the mask. Note that the mask type
- // may not actually need widening, leading it be split along with the VP
- // operation.
- // FIXME: This could lead to an infinite split/widen loop. We only handle the
- // case where the mask needs widening to an identically-sized type as the
- // vector inputs.
- SDValue Mask = N->getOperand(2);
- assert(getTypeAction(Mask.getValueType()) ==
- TargetLowering::TypeWidenVector &&
- "Unable to widen binary VP op");
- Mask = GetWidenedVector(Mask);
- assert(Mask.getValueType().getVectorElementCount() ==
- WidenVT.getVectorElementCount() &&
- "Unable to widen binary VP op");
+
+ assert(N->getNumOperands() == 4 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+
+ SDValue Mask =
+ GetWidenedMask(N->getOperand(2), WidenVT.getVectorElementCount());
return DAG.getNode(N->getOpcode(), dl, WidenVT,
{InOp1, InOp2, Mask, N->getOperand(3)}, N->getFlags());
}
@@ -4226,6 +4399,33 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
report_fatal_error("Unable to widen vector load");
}
+SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD(VPLoadSDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Mask = N->getMask();
+ SDValue EVL = N->getVectorLength();
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ SDLoc dl(N);
+
+ // The mask should be widened as well
+ assert(getTypeAction(Mask.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen binary VP op");
+ Mask = GetWidenedVector(Mask);
+ assert(Mask.getValueType().getVectorElementCount() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Mask.getValueType())
+ .getVectorElementCount() &&
+ "Unable to widen vector load");
+
+ SDValue Res =
+ DAG.getLoadVP(N->getAddressingMode(), ExtType, WidenVT, dl, N->getChain(),
+ N->getBasePtr(), N->getOffset(), Mask, EVL,
+ N->getMemoryVT(), N->getMemOperand(), N->isExpandingLoad());
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0));
@@ -4289,6 +4489,29 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
return Res;
}
+SDValue DAGTypeLegalizer::WidenVecRes_VP_GATHER(VPGatherSDNode *N) {
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Mask = N->getMask();
+ SDValue Scale = N->getScale();
+ ElementCount WideEC = WideVT.getVectorElementCount();
+ SDLoc dl(N);
+
+ SDValue Index = GetWidenedVector(N->getIndex());
+ EVT WideMemVT = EVT::getVectorVT(*DAG.getContext(),
+ N->getMemoryVT().getScalarType(), WideEC);
+ Mask = GetWidenedMask(Mask, WideEC);
+
+ SDValue Ops[] = {N->getChain(), N->getBasePtr(), Index, Scale,
+ Mask, N->getVectorLength()};
+ SDValue Res = DAG.getGatherVP(DAG.getVTList(WideVT, MVT::Other), WideMemVT,
+ dl, Ops, N->getMemOperand(), N->getIndexType());
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_ScalarOp(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0));
@@ -4522,19 +4745,19 @@ SDValue DAGTypeLegalizer::WidenVSELECTMask(SDNode *N) {
return Mask;
}
-SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecRes_Select(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
ElementCount WidenEC = WidenVT.getVectorElementCount();
SDValue Cond1 = N->getOperand(0);
EVT CondVT = Cond1.getValueType();
+ unsigned Opcode = N->getOpcode();
if (CondVT.isVector()) {
if (SDValue WideCond = WidenVSELECTMask(N)) {
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
SDValue InOp2 = GetWidenedVector(N->getOperand(2));
assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- WidenVT, WideCond, InOp1, InOp2);
+ return DAG.getNode(Opcode, SDLoc(N), WidenVT, WideCond, InOp1, InOp2);
}
EVT CondEltVT = CondVT.getVectorElementType();
@@ -4560,8 +4783,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
SDValue InOp2 = GetWidenedVector(N->getOperand(2));
assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- WidenVT, Cond1, InOp1, InOp2);
+ return Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE
+ ? DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2,
+ N->getOperand(3))
+ : DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2);
}
SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
@@ -4711,9 +4936,11 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
+ case ISD::VP_STORE: Res = WidenVecOp_VP_STORE(N, OpNo); break;
case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break;
case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
+ case ISD::VP_SCATTER: Res = WidenVecOp_VP_SCATTER(N, OpNo); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break;
@@ -4766,6 +4993,23 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VECREDUCE_SEQ_FMUL:
Res = WidenVecOp_VECREDUCE_SEQ(N);
break;
+ case ISD::VP_REDUCE_FADD:
+ case ISD::VP_REDUCE_SEQ_FADD:
+ case ISD::VP_REDUCE_FMUL:
+ case ISD::VP_REDUCE_SEQ_FMUL:
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ case ISD::VP_REDUCE_FMAX:
+ case ISD::VP_REDUCE_FMIN:
+ Res = WidenVecOp_VP_REDUCE(N);
+ break;
}
// If Res is null, the sub-method took care of registering the result.
@@ -5092,15 +5336,54 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
unsigned NumVTElts = StVT.getVectorMinNumElements();
SDValue EVL =
DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts));
- const auto *MMO = ST->getMemOperand();
- return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(), Mask,
- EVL, MMO->getPointerInfo(), MMO->getAlign(),
- MMO->getFlags(), MMO->getAAInfo());
+ return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(),
+ DAG.getUNDEF(ST->getBasePtr().getValueType()), Mask,
+ EVL, StVal.getValueType(), ST->getMemOperand(),
+ ST->getAddressingMode());
}
report_fatal_error("Unable to widen vector store");
}
+SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) {
+ assert((OpNo == 1 || OpNo == 3) &&
+ "Can widen only data or mask operand of vp_store");
+ VPStoreSDNode *ST = cast<VPStoreSDNode>(N);
+ SDValue Mask = ST->getMask();
+ SDValue StVal = ST->getValue();
+ SDLoc dl(N);
+
+ if (OpNo == 1) {
+ // Widen the value.
+ StVal = GetWidenedVector(StVal);
+
+ // We only handle the case where the mask needs widening to an
+ // identically-sized type as the vector inputs.
+ assert(getTypeAction(Mask.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen VP store");
+ Mask = GetWidenedVector(Mask);
+ } else {
+ Mask = GetWidenedVector(Mask);
+
+ // We only handle the case where the stored value needs widening to an
+ // identically-sized type as the mask.
+ assert(getTypeAction(StVal.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen VP store");
+ StVal = GetWidenedVector(StVal);
+ }
+
+ assert(Mask.getValueType().getVectorElementCount() ==
+ StVal.getValueType().getVectorElementCount() &&
+ "Mask and data vectors should have the same number of elements");
+ return DAG.getStoreVP(ST->getChain(), dl, StVal, ST->getBasePtr(),
+ ST->getOffset(), Mask, ST->getVectorLength(),
+ ST->getMemoryVT(), ST->getMemOperand(),
+ ST->getAddressingMode(), ST->isTruncatingStore(),
+ ST->isCompressingStore());
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
assert((OpNo == 1 || OpNo == 3) &&
"Can widen only data or mask operand of mstore");
@@ -5202,6 +5485,34 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
MSC->isTruncatingStore());
}
+SDValue DAGTypeLegalizer::WidenVecOp_VP_SCATTER(SDNode *N, unsigned OpNo) {
+ VPScatterSDNode *VPSC = cast<VPScatterSDNode>(N);
+ SDValue DataOp = VPSC->getValue();
+ SDValue Mask = VPSC->getMask();
+ SDValue Index = VPSC->getIndex();
+ SDValue Scale = VPSC->getScale();
+ EVT WideMemVT = VPSC->getMemoryVT();
+
+ if (OpNo == 1) {
+ DataOp = GetWidenedVector(DataOp);
+ Index = GetWidenedVector(Index);
+ const auto WideEC = DataOp.getValueType().getVectorElementCount();
+ Mask = GetWidenedMask(Mask, WideEC);
+ WideMemVT = EVT::getVectorVT(*DAG.getContext(),
+ VPSC->getMemoryVT().getScalarType(), WideEC);
+ } else if (OpNo == 4) {
+ // Just widen the index. It's allowed to have extra elements.
+ Index = GetWidenedVector(Index);
+ } else
+ llvm_unreachable("Can't widen this operand of mscatter");
+
+ SDValue Ops[] = {
+ VPSC->getChain(), DataOp, VPSC->getBasePtr(), Index, Scale, Mask,
+ VPSC->getVectorLength()};
+ return DAG.getScatterVP(DAG.getVTList(MVT::Other), WideMemVT, SDLoc(N), Ops,
+ VPSC->getMemOperand(), VPSC->getIndexType());
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
SDValue InOp0 = GetWidenedVector(N->getOperand(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
@@ -5320,6 +5631,19 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) {
return DAG.getNode(Opc, dl, N->getValueType(0), AccOp, Op, Flags);
}
+SDValue DAGTypeLegalizer::WidenVecOp_VP_REDUCE(SDNode *N) {
+ assert(N->isVPOpcode() && "Expected VP opcode");
+
+ SDLoc dl(N);
+ SDValue Op = GetWidenedVector(N->getOperand(1));
+ SDValue Mask = GetWidenedMask(N->getOperand(2),
+ Op.getValueType().getVectorElementCount());
+
+ return DAG.getNode(N->getOpcode(), dl, N->getValueType(0),
+ {N->getOperand(0), Op, Mask, N->getOperand(3)},
+ N->getFlags());
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
// This only gets called in the case that the left and right inputs and
// result are of a legal odd vector type, and the condition is illegal i1 of
@@ -5779,6 +6103,8 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
EVT InVT = InOp.getValueType();
assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
"input and widen element type must match");
+ assert(!InVT.isScalableVector() && !NVT.isScalableVector() &&
+ "cannot modify scalable vectors in this way");
SDLoc dl(InOp);
// Check if InOp already has the right width.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index aec2cf38b400..403f34573899 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -286,7 +286,7 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
// Cluster loads by adding MVT::Glue outputs and inputs. This also
// ensure they are scheduled in order of increasing addresses.
SDNode *Lead = Loads[0];
- SDValue InGlue = SDValue(nullptr, 0);
+ SDValue InGlue;
if (AddGlue(Lead, InGlue, true, DAG))
InGlue = SDValue(Lead, Lead->getNumValues() - 1);
for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
@@ -1057,12 +1057,13 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
"first terminator cannot be a debug value");
for (MachineInstr &MI : make_early_inc_range(
make_range(std::next(FirstTerm), InsertBB->end()))) {
+ // Only scan up to insertion point.
+ if (&MI == InsertPos)
+ break;
+
if (!MI.isDebugValue())
continue;
- if (&MI == InsertPos)
- InsertPos = std::prev(InsertPos->getIterator());
-
// The DBG_VALUE was referencing a value produced by a terminator. By
// moving the DBG_VALUE, the referenced value also needs invalidating.
MI.getOperand(0).ChangeToRegister(0, false);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 2ae0d4df7b77..45f3005e8f57 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -373,31 +373,46 @@ ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) {
llvm_unreachable("Expected VECREDUCE opcode");
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VP_REDUCE_FADD:
+ case ISD::VP_REDUCE_SEQ_FADD:
return ISD::FADD;
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_SEQ_FMUL:
+ case ISD::VP_REDUCE_FMUL:
+ case ISD::VP_REDUCE_SEQ_FMUL:
return ISD::FMUL;
case ISD::VECREDUCE_ADD:
+ case ISD::VP_REDUCE_ADD:
return ISD::ADD;
case ISD::VECREDUCE_MUL:
+ case ISD::VP_REDUCE_MUL:
return ISD::MUL;
case ISD::VECREDUCE_AND:
+ case ISD::VP_REDUCE_AND:
return ISD::AND;
case ISD::VECREDUCE_OR:
+ case ISD::VP_REDUCE_OR:
return ISD::OR;
case ISD::VECREDUCE_XOR:
+ case ISD::VP_REDUCE_XOR:
return ISD::XOR;
case ISD::VECREDUCE_SMAX:
+ case ISD::VP_REDUCE_SMAX:
return ISD::SMAX;
case ISD::VECREDUCE_SMIN:
+ case ISD::VP_REDUCE_SMIN:
return ISD::SMIN;
case ISD::VECREDUCE_UMAX:
+ case ISD::VP_REDUCE_UMAX:
return ISD::UMAX;
case ISD::VECREDUCE_UMIN:
+ case ISD::VP_REDUCE_UMIN:
return ISD::UMIN;
case ISD::VECREDUCE_FMAX:
+ case ISD::VP_REDUCE_FMAX:
return ISD::FMAXNUM;
case ISD::VECREDUCE_FMIN:
+ case ISD::VP_REDUCE_FMIN:
return ISD::FMINNUM;
}
}
@@ -3066,7 +3081,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
case ISD::MUL: {
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- Known = KnownBits::mul(Known, Known2);
+ bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1);
+ Known = KnownBits::mul(Known, Known2, SelfMultiply);
break;
}
case ISD::MULHU: {
@@ -3085,8 +3101,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result");
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1);
if (Op.getResNo() == 0)
- Known = KnownBits::mul(Known, Known2);
+ Known = KnownBits::mul(Known, Known2, SelfMultiply);
else
Known = KnownBits::mulhu(Known, Known2);
break;
@@ -3095,8 +3112,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result");
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1);
if (Op.getResNo() == 0)
- Known = KnownBits::mul(Known, Known2);
+ Known = KnownBits::mul(Known, Known2, SelfMultiply);
else
Known = KnownBits::mulhs(Known, Known2);
break;
@@ -3363,6 +3381,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
case ISD::AssertAlign: {
unsigned LogOfAlign = Log2(cast<AssertAlignSDNode>(Op)->getAlign());
assert(LogOfAlign != 0);
+
+ // TODO: Should use maximum with source
// If a node is guaranteed to be aligned, set low zero bits accordingly as
// well as clearing one bits.
Known.Zero.setLowBits(LogOfAlign);
@@ -3584,6 +3604,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = KnownBits::smin(Known, Known2);
break;
}
+ case ISD::FP_TO_UINT_SAT: {
+ // FP_TO_UINT_SAT produces an unsigned value that fits in the saturating VT.
+ EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ Known.Zero |= APInt::getBitsSetFrom(BitWidth, VT.getScalarSizeInBits());
+ break;
+ }
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
if (Op.getResNo() == 1) {
// The boolean result conforms to getBooleanContents.
@@ -3860,6 +3886,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
break;
}
+ case ISD::FP_TO_SINT_SAT:
+ // FP_TO_SINT_SAT produces a signed value that fits in the saturating VT.
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits();
+ return VTBits - Tmp + 1;
case ISD::SIGN_EXTEND:
Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits();
return ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1) + Tmp;
@@ -4252,7 +4282,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// scalar cases.
Type *CstTy = Cst->getType();
if (CstTy->isVectorTy() &&
- (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits()) {
+ (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits() &&
+ VTBits == CstTy->getScalarSizeInBits()) {
Tmp = VTBits;
for (unsigned i = 0; i != NumElts; ++i) {
if (!DemandedElts[i])
@@ -4294,31 +4325,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// Finally, if we can prove that the top bits of the result are 0's or 1's,
// use this information.
KnownBits Known = computeKnownBits(Op, DemandedElts, Depth);
-
- APInt Mask;
- if (Known.isNonNegative()) { // sign bit is 0
- Mask = Known.Zero;
- } else if (Known.isNegative()) { // sign bit is 1;
- Mask = Known.One;
- } else {
- // Nothing known.
- return FirstAnswer;
- }
-
- // Okay, we know that the sign bit in Mask is set. Use CLO to determine
- // the number of identical bits in the top of the input value.
- Mask <<= Mask.getBitWidth()-VTBits;
- return std::max(FirstAnswer, Mask.countLeadingOnes());
+ return std::max(FirstAnswer, Known.countMinSignBits());
}
-unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op, unsigned Depth) const {
+unsigned SelectionDAG::ComputeMaxSignificantBits(SDValue Op,
+ unsigned Depth) const {
unsigned SignBits = ComputeNumSignBits(Op, Depth);
return Op.getScalarValueSizeInBits() - SignBits + 1;
}
-unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op,
- const APInt &DemandedElts,
- unsigned Depth) const {
+unsigned SelectionDAG::ComputeMaxSignificantBits(SDValue Op,
+ const APInt &DemandedElts,
+ unsigned Depth) const {
unsigned SignBits = ComputeNumSignBits(Op, DemandedElts, Depth);
return Op.getScalarValueSizeInBits() - SignBits + 1;
}
@@ -5102,6 +5120,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"BSWAP types must be a multiple of 16 bits!");
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
+ // bswap(bswap(X)) -> X.
+ if (OpOpcode == ISD::BSWAP)
+ return Operand.getOperand(0);
break;
case ISD::BITREVERSE:
assert(VT.isInteger() && VT == Operand.getValueType() &&
@@ -5398,6 +5419,19 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
}
}
+ // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
+ // (shl step_vector(C0), C1) -> (step_vector(C0 << C1))
+ if ((Opcode == ISD::MUL || Opcode == ISD::SHL) &&
+ Ops[0].getOpcode() == ISD::STEP_VECTOR) {
+ APInt RHSVal;
+ if (ISD::isConstantSplatVector(Ops[1].getNode(), RHSVal)) {
+ APInt NewStep = Opcode == ISD::MUL
+ ? Ops[0].getConstantOperandAPInt(0) * RHSVal
+ : Ops[0].getConstantOperandAPInt(0) << RHSVal;
+ return getStepVector(DL, VT, NewStep);
+ }
+ }
+
auto IsScalarOrSameVectorSize = [NumElts](const SDValue &Op) {
return !Op.getValueType().isVector() ||
Op.getValueType().getVectorElementCount() == NumElts;
@@ -5595,22 +5629,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(N1.getOpcode() != ISD::DELETED_NODE &&
N2.getOpcode() != ISD::DELETED_NODE &&
"Operand is DELETED_NODE!");
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
- ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
- ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
-
// Canonicalize constant to RHS if commutative.
if (TLI->isCommutativeBinOp(Opcode)) {
- if (N1C && !N2C) {
- std::swap(N1C, N2C);
+ bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1);
+ bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2);
+ bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1);
+ bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2);
+ if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP))
std::swap(N1, N2);
- } else if (N1CFP && !N2CFP) {
- std::swap(N1CFP, N2CFP);
- std::swap(N1, N2);
- }
}
+ auto *N1C = dyn_cast<ConstantSDNode>(N1);
+ auto *N2C = dyn_cast<ConstantSDNode>(N2);
+
+ // Don't allow undefs in vector splats - we might be returning N2 when folding
+ // to zero etc.
+ ConstantSDNode *N2CV =
+ isConstOrConstSplat(N2, /*AllowUndefs*/ false, /*AllowTruncation*/ true);
+
switch (Opcode) {
default: break;
case ISD::TokenFactor:
@@ -5640,9 +5676,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
N1.getValueType() == VT && "Binary operator types must match!");
// (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's
// worth handling here.
- if (N2C && N2C->isZero())
+ if (N2CV && N2CV->isZero())
return N2;
- if (N2C && N2C->isAllOnes()) // X & -1 -> X
+ if (N2CV && N2CV->isAllOnes()) // X & -1 -> X
return N1;
break;
case ISD::OR:
@@ -5654,7 +5690,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
N1.getValueType() == VT && "Binary operator types must match!");
// (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so
// it's worth handling here.
- if (N2C && N2C->isZero())
+ if (N2CV && N2CV->isZero())
return N1;
if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.isVector() &&
VT.getVectorElementType() == MVT::i1)
@@ -5760,7 +5796,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// size of the value, the shift/rotate count is guaranteed to be zero.
if (VT == MVT::i1)
return N1;
- if (N2C && N2C->isZero())
+ if (N2CV && N2CV->isZero())
return N1;
break;
case ISD::FP_ROUND:
@@ -6358,7 +6394,7 @@ static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG,
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty))
return DAG.getConstant(Val, dl, VT);
- return SDValue(nullptr, 0);
+ return SDValue();
}
SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, TypeSize Offset,
@@ -7697,23 +7733,6 @@ SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM,
SDValue Offset, SDValue Mask, SDValue EVL,
EVT MemVT, MachineMemOperand *MMO,
bool IsExpanding) {
- if (VT == MemVT) {
- ExtType = ISD::NON_EXTLOAD;
- } else if (ExtType == ISD::NON_EXTLOAD) {
- assert(VT == MemVT && "Non-extending load from different memory type!");
- } else {
- // Extending load.
- assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) &&
- "Should only be an extending load, not truncating!");
- assert(VT.isInteger() == MemVT.isInteger() &&
- "Cannot convert from FP to Int or Int -> FP!");
- assert(VT.isVector() == MemVT.isVector() &&
- "Cannot use an ext load to convert to or from a vector!");
- assert((!VT.isVector() ||
- VT.getVectorElementCount() == MemVT.getVectorElementCount()) &&
- "Cannot use an ext load to change the number of vector elements!");
- }
-
bool Indexed = AM != ISD::UNINDEXED;
assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!");
@@ -7802,48 +7821,29 @@ SDValue SelectionDAG::getIndexedLoadVP(SDValue OrigLoad, const SDLoc &dl,
}
SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,
- SDValue Ptr, SDValue Mask, SDValue EVL,
- MachinePointerInfo PtrInfo, Align Alignment,
- MachineMemOperand::Flags MMOFlags,
- const AAMDNodes &AAInfo, bool IsCompressing) {
+ SDValue Ptr, SDValue Offset, SDValue Mask,
+ SDValue EVL, EVT MemVT, MachineMemOperand *MMO,
+ ISD::MemIndexedMode AM, bool IsTruncating,
+ bool IsCompressing) {
assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
-
- MMOFlags |= MachineMemOperand::MOStore;
- assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
-
- if (PtrInfo.V.isNull())
- PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
-
- MachineFunction &MF = getMachineFunction();
- uint64_t Size =
- MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize());
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
- return getStoreVP(Chain, dl, Val, Ptr, Mask, EVL, MMO, IsCompressing);
-}
-
-SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,
- SDValue Ptr, SDValue Mask, SDValue EVL,
- MachineMemOperand *MMO, bool IsCompressing) {
- assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
- EVT VT = Val.getValueType();
- SDVTList VTs = getVTList(MVT::Other);
- SDValue Undef = getUNDEF(Ptr.getValueType());
- SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL};
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.isUndef()) && "Unindexed vp_store with an offset!");
+ SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other)
+ : getVTList(MVT::Other);
+ SDValue Ops[] = {Chain, Val, Ptr, Offset, Mask, EVL};
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops);
- ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(MemVT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>(
- dl.getIROrder(), VTs, ISD::UNINDEXED, false, IsCompressing, VT, MMO));
+ dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<VPStoreSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- auto *N =
- newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
- ISD::UNINDEXED, false, IsCompressing, VT, MMO);
+ auto *N = newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
+ IsTruncating, IsCompressing, MemVT, MMO);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
@@ -7885,7 +7885,9 @@ SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl,
assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
if (VT == SVT)
- return getStoreVP(Chain, dl, Val, Ptr, Mask, EVL, MMO, IsCompressing);
+ return getStoreVP(Chain, dl, Val, Ptr, getUNDEF(Ptr.getValueType()), Mask,
+ EVL, VT, MMO, ISD::UNINDEXED,
+ /*IsTruncating*/ false, IsCompressing);
assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
"Should only be a truncating store, not extending!");
@@ -10661,6 +10663,23 @@ SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT,
return std::make_pair(Lo, Hi);
}
+std::pair<SDValue, SDValue> SelectionDAG::SplitEVL(SDValue N, EVT VecVT,
+ const SDLoc &DL) {
+ // Split the vector length parameter.
+ // %evl -> umin(%evl, %halfnumelts) and usubsat(%evl - %halfnumelts).
+ EVT VT = N.getValueType();
+ assert(VecVT.getVectorElementCount().isKnownEven() &&
+ "Expecting the mask to be an evenly-sized vector");
+ unsigned HalfMinNumElts = VecVT.getVectorMinNumElements() / 2;
+ SDValue HalfNumElts =
+ VecVT.isFixedLengthVector()
+ ? getConstant(HalfMinNumElts, DL, VT)
+ : getVScale(DL, VT, APInt(VT.getScalarSizeInBits(), HalfMinNumElts));
+ SDValue Lo = getNode(ISD::UMIN, DL, VT, N, HalfNumElts);
+ SDValue Hi = getNode(ISD::USUBSAT, DL, VT, N, HalfNumElts);
+ return std::make_pair(Lo, Hi);
+}
+
/// Widen the vector up to the next power of two using INSERT_SUBVECTOR.
SDValue SelectionDAG::WidenVector(const SDValue &N, const SDLoc &DL) {
EVT VT = N.getValueType();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 63cd723cf6da..41460f78e1c2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1683,6 +1683,8 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(V)) {
return DAG.getMDNode(cast<MDNode>(MD->getMetadata()));
}
+ if (const auto *BB = dyn_cast<BasicBlock>(V))
+ return DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
llvm_unreachable("Can't get register for value!");
}
@@ -4846,10 +4848,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
}
if (!I.getType()->isVoidTy()) {
- if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
- EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy);
- Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
- } else
+ if (!isa<VectorType>(I.getType()))
Result = lowerRangeToAssertZExt(DAG, I, Result);
MaybeAlign Alignment = I.getRetAlign();
@@ -7327,8 +7326,6 @@ void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Value *PtrOperand = VPIntrin.getArgOperand(0);
MaybeAlign Alignment = VPIntrin.getPointerAlignment();
- if (!Alignment)
- Alignment = DAG.getEVTAlign(VT);
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
SDValue LD;
@@ -7336,6 +7333,8 @@ void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
if (!IsGather) {
// Do not serialize variable-length loads of constant memory with
// anything.
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT);
MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
AddToChain = !AA || !AA->pointsToConstantMemory(ML);
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
@@ -7345,6 +7344,8 @@ void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
MMO, false /*IsExpanding */);
} else {
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT.getScalarType());
unsigned AS =
PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
@@ -7385,18 +7386,22 @@ void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin,
Value *PtrOperand = VPIntrin.getArgOperand(1);
EVT VT = OpValues[0].getValueType();
MaybeAlign Alignment = VPIntrin.getPointerAlignment();
- if (!Alignment)
- Alignment = DAG.getEVTAlign(VT);
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
SDValue ST;
if (!IsScatter) {
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT);
+ SDValue Ptr = OpValues[1];
+ SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
MemoryLocation::UnknownSize, *Alignment, AAInfo);
- ST =
- DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], OpValues[1],
- OpValues[2], OpValues[3], MMO, false /* IsTruncating */);
+ ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset,
+ OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED,
+ /* IsTruncating */ false, /*IsCompressing*/ false);
} else {
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT.getScalarType());
unsigned AS =
PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
@@ -8250,7 +8255,8 @@ public:
/// corresponds to. If there is no Value* for this operand, it returns
/// MVT::Other.
EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI,
- const DataLayout &DL) const {
+ const DataLayout &DL,
+ llvm::Type *ParamElemType) const {
if (!CallOperandVal) return MVT::Other;
if (isa<BasicBlock>(CallOperandVal))
@@ -8262,10 +8268,8 @@ public:
// If this is an indirect operand, the operand is a pointer to the
// accessed type.
if (isIndirect) {
- PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
- if (!PtrTy)
- report_fatal_error("Indirect operand for inline asm not a pointer!");
- OpTy = PtrTy->getElementType();
+ OpTy = ParamElemType;
+ assert(OpTy && "Indirect opernad must have elementtype attribute");
}
// Look for vector wrapped in a struct. e.g. { <16 x i8> }.
@@ -8559,37 +8563,19 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
unsigned ResNo = 0; // ResNo - The result number of the next output.
- unsigned NumMatchingOps = 0;
for (auto &T : TargetConstraints) {
ConstraintOperands.push_back(SDISelAsmOperandInfo(T));
SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
// Compute the value type for each operand.
- if (OpInfo.Type == InlineAsm::isInput ||
- (OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) {
- OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);
-
- // Process the call argument. BasicBlocks are labels, currently appearing
- // only in asm's.
- if (isa<CallBrInst>(Call) &&
- ArgNo - 1 >= (cast<CallBrInst>(&Call)->arg_size() -
- cast<CallBrInst>(&Call)->getNumIndirectDests() -
- NumMatchingOps) &&
- (NumMatchingOps == 0 ||
- ArgNo - 1 <
- (cast<CallBrInst>(&Call)->arg_size() - NumMatchingOps))) {
- const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal);
- EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true);
- OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT);
- } else if (const auto *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
- OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
- } else {
- OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
- }
-
+ if (OpInfo.hasArg()) {
+ OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
+ OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
+ Type *ParamElemTy = Call.getAttributes().getParamElementType(ArgNo);
EVT VT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI,
- DAG.getDataLayout());
+ DAG.getDataLayout(), ParamElemTy);
OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
+ ArgNo++;
} else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
// The return value of the call is this value. As such, there is no
// corresponding argument.
@@ -8607,9 +8593,6 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
OpInfo.ConstraintVT = MVT::Other;
}
- if (OpInfo.hasMatchingInput())
- ++NumMatchingOps;
-
if (!HasSideEffect)
HasSideEffect = OpInfo.hasMemory(TLI);
@@ -11246,12 +11229,6 @@ void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) {
unsigned NumElts = VT.getVectorNumElements();
- if ((-Imm > NumElts) || (Imm >= NumElts)) {
- // Result is undefined if immediate is out-of-bounds.
- setValue(&I, DAG.getUNDEF(VT));
- return;
- }
-
uint64_t Idx = (NumElts + Imm) % NumElts;
// Use VECTOR_SHUFFLE to maintain original behaviour for fixed-length vectors.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e6b06ab93d6b..a98c21f16c71 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -60,7 +60,7 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
// Conservatively require the attributes of the call to match those of
// the return. Ignore following attributes because they don't affect the
// call sequence.
- AttrBuilder CallerAttrs(F.getAttributes(), AttributeList::ReturnIndex);
+ AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
Attribute::DereferenceableOrNull, Attribute::NoAlias,
Attribute::NonNull})
@@ -1806,6 +1806,31 @@ bool TargetLowering::SimplifyDemandedBits(
}
case ISD::BSWAP: {
SDValue Src = Op.getOperand(0);
+
+ // If the only bits demanded come from one byte of the bswap result,
+ // just shift the input byte into position to eliminate the bswap.
+ unsigned NLZ = DemandedBits.countLeadingZeros();
+ unsigned NTZ = DemandedBits.countTrailingZeros();
+
+ // Round NTZ down to the next byte. If we have 11 trailing zeros, then
+ // we need all the bits down to bit 8. Likewise, round NLZ. If we
+ // have 14 leading zeros, round to 8.
+ NLZ = alignDown(NLZ, 8);
+ NTZ = alignDown(NTZ, 8);
+ // If we need exactly one byte, we can do this transformation.
+ if (BitWidth - NLZ - NTZ == 8) {
+ // Replace this with either a left or right shift to get the byte into
+ // the right place.
+ unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
+ if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
+ EVT ShiftAmtTy = getShiftAmountTy(VT, DL);
+ unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
+ SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy);
+ SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
APInt DemandedSrcBits = DemandedBits.byteSwap();
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
Depth + 1))
@@ -1833,19 +1858,15 @@ bool TargetLowering::SimplifyDemandedBits(
// If we only care about the highest bit, don't bother shifting right.
if (DemandedBits.isSignMask()) {
unsigned MinSignedBits =
- TLO.DAG.ComputeMinSignedBits(Op0, DemandedElts, Depth + 1);
+ TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
bool AlreadySignExtended = ExVTBits >= MinSignedBits;
// However if the input is already sign extended we expect the sign
// extension to be dropped altogether later and do not simplify.
if (!AlreadySignExtended) {
// Compute the correct shift amount type, which must be getShiftAmountTy
// for scalar types after legalization.
- EVT ShiftAmtTy = VT;
- if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
- ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
-
- SDValue ShiftAmt =
- TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
+ SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
+ getShiftAmountTy(VT, DL));
return TLO.CombineTo(Op,
TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
}
@@ -3233,17 +3254,29 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode Cond, const SDLoc &DL,
DAGCombinerInfo &DCI) const {
- // Match these patterns in any of their permutations:
- // (X & Y) == Y
- // (X & Y) != Y
if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
std::swap(N0, N1);
+ SelectionDAG &DAG = DCI.DAG;
EVT OpVT = N0.getValueType();
if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
(Cond != ISD::SETEQ && Cond != ISD::SETNE))
return SDValue();
+ // (X & Y) != 0 --> zextOrTrunc(X & Y)
+ // iff everything but LSB is known zero:
+ if (Cond == ISD::SETNE && isNullConstant(N1) &&
+ (getBooleanContents(OpVT) == TargetLowering::UndefinedBooleanContent ||
+ getBooleanContents(OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
+ unsigned NumEltBits = OpVT.getScalarSizeInBits();
+ APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
+ if (DAG.MaskedValueIsZero(N0, UpperBits))
+ return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
+ }
+
+ // Match these patterns in any of their permutations:
+ // (X & Y) == Y
+ // (X & Y) != Y
SDValue X, Y;
if (N0.getOperand(0) == N1) {
X = N0.getOperand(1);
@@ -3255,7 +3288,6 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
return SDValue();
}
- SelectionDAG &DAG = DCI.DAG;
SDValue Zero = DAG.getConstant(0, DL, OpVT);
if (DAG.isKnownToBeAPowerOfTwo(Y)) {
// Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
@@ -3678,9 +3710,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
// Figure out how many bits we need to preserve this constant.
- unsigned ReqdBits = Signed ?
- C1.getBitWidth() - C1.getNumSignBits() + 1 :
- C1.getActiveBits();
+ unsigned ReqdBits = Signed ? C1.getMinSignedBits() : C1.getActiveBits();
// Make sure we're not losing bits from the constant.
if (MinBits > 0 &&
@@ -4594,20 +4624,12 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
char ConstraintLetter = Constraint[0];
switch (ConstraintLetter) {
default: break;
- case 'X': // Allows any operand; labels (basic block) use this.
- if (Op.getOpcode() == ISD::BasicBlock ||
- Op.getOpcode() == ISD::TargetBlockAddress) {
- Ops.push_back(Op);
- return;
- }
- LLVM_FALLTHROUGH;
+ case 'X': // Allows any operand
case 'i': // Simple Integer or Relocatable Constant
case 'n': // Simple Integer
case 's': { // Relocatable Constant
- GlobalAddressSDNode *GA;
ConstantSDNode *C;
- BlockAddressSDNode *BA;
uint64_t Offset = 0;
// Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
@@ -4615,13 +4637,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
// while in this case the GA may be furthest from the root node which is
// likely an ISD::ADD.
- while (1) {
- if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && ConstraintLetter != 'n') {
- Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
- GA->getValueType(0),
- Offset + GA->getOffset()));
- return;
- }
+ while (true) {
if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
// gcc prints these as sign extended. Sign extend value to 64 bits
// now; without this it would get ZExt'd later in
@@ -4636,11 +4652,23 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
return;
}
- if ((BA = dyn_cast<BlockAddressSDNode>(Op)) && ConstraintLetter != 'n') {
- Ops.push_back(DAG.getTargetBlockAddress(
- BA->getBlockAddress(), BA->getValueType(0),
- Offset + BA->getOffset(), BA->getTargetFlags()));
- return;
+ if (ConstraintLetter != 'n') {
+ if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
+ Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
+ GA->getValueType(0),
+ Offset + GA->getOffset()));
+ return;
+ }
+ if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
+ Ops.push_back(DAG.getTargetBlockAddress(
+ BA->getBlockAddress(), BA->getValueType(0),
+ Offset + BA->getOffset(), BA->getTargetFlags()));
+ return;
+ }
+ if (isa<BasicBlockSDNode>(Op)) {
+ Ops.push_back(Op);
+ return;
+ }
}
const unsigned OpCode = Op.getOpcode();
if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
@@ -4753,7 +4781,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
case InlineAsm::isOutput:
// Indirect outputs just consume an argument.
if (OpInfo.isIndirect) {
- OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);
+ OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
break;
}
@@ -4771,7 +4799,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
++ResNo;
break;
case InlineAsm::isInput:
- OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);
+ OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
break;
case InlineAsm::isClobber:
// Nothing to do.
@@ -4781,10 +4809,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
if (OpInfo.CallOperandVal) {
llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
if (OpInfo.isIndirect) {
- llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
- if (!PtrTy)
- report_fatal_error("Indirect operand for inline asm not a pointer!");
- OpTy = PtrTy->getElementType();
+ OpTy = Call.getAttributes().getParamElementType(ArgNo);
+ assert(OpTy && "Indirect opernad must have elementtype attribute");
}
// Look for vector wrapped in a struct. e.g. { <16 x i8> }.
@@ -4814,6 +4840,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
} else {
OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
}
+
+ ArgNo++;
}
}
@@ -5087,17 +5115,18 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
// 'X' matches anything.
if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
- // Labels and constants are handled elsewhere ('X' is the only thing
- // that matches labels). For Functions, the type here is the type of
- // the result, which is not what we want to look at; leave them alone.
+ // Constants are handled elsewhere. For Functions, the type here is the
+ // type of the result, which is not what we want to look at; leave them
+ // alone.
Value *v = OpInfo.CallOperandVal;
- if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
- OpInfo.CallOperandVal = v;
+ if (isa<ConstantInt>(v) || isa<Function>(v)) {
return;
}
- if (Op.getNode() && Op.getOpcode() == ISD::TargetBlockAddress)
+ if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
+ OpInfo.ConstraintCode = "i";
return;
+ }
// Otherwise, try to resolve it to something we know about by looking at
// the actual operand type.
@@ -6438,12 +6467,6 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
unsigned ShiftAmount = OuterBitSize - InnerBitSize;
EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
- if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) {
- // FIXME getShiftAmountTy does not always return a sensible result when VT
- // is an illegal type, and so the type may be too small to fit the shift
- // amount. Override it with i32. The shift will have to be legalized.
- ShiftAmountTy = MVT::i32;
- }
SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
if (!LH.getNode() && !RH.getNode() &&
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
index f89069e9f728..f6ad2b50abcd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -273,6 +273,8 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
LLVM_DEBUG(dbgs() << "Frame instruction: " << MI << '\n');
return true;
}
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
for (const MachineOperand &MO : MI.operands()) {
bool UseOrDefCSR = false;
if (MO.isReg()) {
@@ -288,8 +290,14 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
// separately. An SP mentioned by a call instruction, we can ignore,
// though, as it's harmless and we do not want to effectively disable tail
// calls by forcing the restore point to post-dominate them.
- UseOrDefCSR = (!MI.isCall() && PhysReg == SP) ||
- RCI.getLastCalleeSavedAlias(PhysReg);
+ // PPC's LR is also not normally described as a callee-saved register in
+ // calling convention definitions, so we need to watch for it, too. An LR
+ // mentioned implicitly by a return (or "branch to link register")
+ // instruction we can ignore, otherwise we may pessimize shrinkwrapping.
+ UseOrDefCSR =
+ (!MI.isCall() && PhysReg == SP) ||
+ RCI.getLastCalleeSavedAlias(PhysReg) ||
+ (!MI.isReturn() && TRI->isNonallocatableRegisterCalleeSave(PhysReg));
} else if (MO.isRegMask()) {
// Check if this regmask clobbers any of the CSRs.
for (unsigned Reg : getCurrentCSRs(RS)) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
index f0d342d26cc4..f69e50eaa0ca 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -715,6 +715,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
SchedPreferenceInfo = Sched::ILP;
GatherAllAliasesMaxDepth = 18;
IsStrictFPEnabled = DisableStrictNodeMutation;
+ MaxBytesForAlignment = 0;
// TODO: the default will be switched to 0 in the next commit, along
// with the Target-specific changes necessary.
MaxAtomicSizeInBitsSupported = 1024;
@@ -2040,6 +2041,11 @@ Align TargetLoweringBase::getPrefLoopAlignment(MachineLoop *ML) const {
return PrefLoopAlignment;
}
+unsigned TargetLoweringBase::getMaxPermittedBytesForAlignment(
+ MachineBasicBlock *MBB) const {
+ return MaxBytesForAlignment;
+}
+
//===----------------------------------------------------------------------===//
// Reciprocal Estimates
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index d1c2cdeb133b..ce350034d073 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -108,8 +108,7 @@ static void GetObjCImageInfo(Module &M, unsigned &Version, unsigned &Flags,
// ELF
//===----------------------------------------------------------------------===//
-TargetLoweringObjectFileELF::TargetLoweringObjectFileELF()
- : TargetLoweringObjectFile() {
+TargetLoweringObjectFileELF::TargetLoweringObjectFileELF() {
SupportDSOLocalEquivalentLowering = true;
}
@@ -478,6 +477,11 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) {
return K;
}
+static bool hasPrefix(StringRef SectionName, StringRef Prefix) {
+ return SectionName.consume_front(Prefix) &&
+ (SectionName.empty() || SectionName[0] == '.');
+}
+
static unsigned getELFSectionType(StringRef Name, SectionKind K) {
// Use SHT_NOTE for section whose name starts with ".note" to allow
// emitting ELF notes from C variable declaration.
@@ -485,13 +489,13 @@ static unsigned getELFSectionType(StringRef Name, SectionKind K) {
if (Name.startswith(".note"))
return ELF::SHT_NOTE;
- if (Name == ".init_array")
+ if (hasPrefix(Name, ".init_array"))
return ELF::SHT_INIT_ARRAY;
- if (Name == ".fini_array")
+ if (hasPrefix(Name, ".fini_array"))
return ELF::SHT_FINI_ARRAY;
- if (Name == ".preinit_array")
+ if (hasPrefix(Name, ".preinit_array"))
return ELF::SHT_PREINIT_ARRAY;
if (K.isBSS() || K.isThreadBSS())
@@ -1139,8 +1143,7 @@ TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) {
// MachO
//===----------------------------------------------------------------------===//
-TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO()
- : TargetLoweringObjectFile() {
+TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO() {
SupportIndirectSymViaGOTPCRel = true;
}
@@ -1185,6 +1188,7 @@ void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer,
StringRef SectionVal;
GetObjCImageInfo(M, VersionVal, ImageInfoFlags, SectionVal);
+ emitCGProfileMetadata(Streamer, M);
// The section is mandatory. If we don't have it, then we don't have GC info.
if (SectionVal.empty())
@@ -2543,8 +2547,7 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
//===----------------------------------------------------------------------===//
// GOFF
//===----------------------------------------------------------------------===//
-TargetLoweringObjectFileGOFF::TargetLoweringObjectFileGOFF()
- : TargetLoweringObjectFile() {}
+TargetLoweringObjectFileGOFF::TargetLoweringObjectFileGOFF() {}
MCSection *TargetLoweringObjectFileGOFF::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
index 402e21d3708b..05004fb935df 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -328,7 +328,7 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID,
// Find the FSProfile file name. The internal option takes the precedence
// before getting from TargetMachine.
-static const std::string getFSProfileFile(const TargetMachine *TM) {
+static std::string getFSProfileFile(const TargetMachine *TM) {
if (!FSProfileFile.empty())
return FSProfileFile.getValue();
const Optional<PGOOptions> &PGOOpt = TM->getPGOOption();
@@ -339,7 +339,7 @@ static const std::string getFSProfileFile(const TargetMachine *TM) {
// Find the Profile remapping file name. The internal option takes the
// precedence before getting from TargetMachine.
-static const std::string getFSRemappingFile(const TargetMachine *TM) {
+static std::string getFSRemappingFile(const TargetMachine *TM) {
if (!FSRemappingFile.empty())
return FSRemappingFile.getValue();
const Optional<PGOOptions> &PGOOpt = TM->getPGOOption();
@@ -1399,6 +1399,9 @@ bool TargetPassConfig::addRegAssignAndRewriteOptimized() {
// Finally rewrite virtual registers.
addPass(&VirtRegRewriterID);
+ // Regalloc scoring for ML-driven eviction - noop except when learning a new
+ // eviction policy.
+ addPass(createRegAllocScoringPass());
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index f5cb518fce3e..6bcf79547056 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -552,7 +552,7 @@ bool TargetRegisterInfo::getCoveringSubRegIndexes(
// Abort if we cannot possibly implement the COPY with the given indexes.
if (BestIdx == 0)
- return 0;
+ return false;
NeededIndexes.push_back(BestIdx);
@@ -581,7 +581,7 @@ bool TargetRegisterInfo::getCoveringSubRegIndexes(
}
if (BestIdx == 0)
- return 0; // Impossible to handle
+ return false; // Impossible to handle
NeededIndexes.push_back(BestIdx);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
index d042deefd746..01ea171e5ea2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
@@ -116,11 +116,11 @@ class IRPromoter {
SmallPtrSet<Value*, 8> Promoted;
void ReplaceAllUsersOfWith(Value *From, Value *To);
- void ExtendSources(void);
- void ConvertTruncs(void);
- void PromoteTree(void);
- void TruncateSinks(void);
- void Cleanup(void);
+ void ExtendSources();
+ void ConvertTruncs();
+ void PromoteTree();
+ void TruncateSinks();
+ void Cleanup();
public:
IRPromoter(LLVMContext &C, IntegerType *Ty, unsigned Width,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
index cbc5d9ec169b..5f59cb4643f2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
@@ -293,7 +293,7 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
const std::vector<unsigned> &MaxPressure =
DAG->getRegPressure().MaxSetPressure;
- HighPressureSets.assign(MaxPressure.size(), 0);
+ HighPressureSets.assign(MaxPressure.size(), false);
for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) {
unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i);
HighPressureSets[i] =