aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2022-03-20 11:40:34 +0000
committerDimitry Andric <dim@FreeBSD.org>2022-06-04 11:58:51 +0000
commit4b6eb0e63c698094db5506763df44cc83c19f643 (patch)
treef1d30b8c10bc6db323b91538745ae8ab8b593910 /contrib/llvm-project/llvm/lib/CodeGen
parent76886853f03395abb680824bcc74e98f83bd477a (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp129
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp172
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp179
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp72
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h29
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.h4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp169
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp92
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp74
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp138
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp54
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp1792
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp366
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp851
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp669
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp403
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp98
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp2913
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h1051
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp247
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp43
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp86
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LoopTraversal.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp343
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp90
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp76
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp167
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp101
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h90
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp132
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp946
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp498
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h32
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp80
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp422
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp924
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp414
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h203
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp182
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp471
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp148
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp71
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp130
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp256
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp152
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp4
161 files changed, 11491 insertions, 5922 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
index e5d576d879b5..7d8a73e12d3a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
@@ -221,9 +221,6 @@ ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) {
}
}
-/// getICmpCondCode - Return the ISD condition code corresponding to
-/// the given LLVM IR integer condition code.
-///
ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
switch (Pred) {
case ICmpInst::ICMP_EQ: return ISD::SETEQ;
@@ -241,6 +238,33 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
}
}
+ICmpInst::Predicate llvm::getICmpCondCode(ISD::CondCode Pred) {
+ switch (Pred) {
+ case ISD::SETEQ:
+ return ICmpInst::ICMP_EQ;
+ case ISD::SETNE:
+ return ICmpInst::ICMP_NE;
+ case ISD::SETLE:
+ return ICmpInst::ICMP_SLE;
+ case ISD::SETULE:
+ return ICmpInst::ICMP_ULE;
+ case ISD::SETGE:
+ return ICmpInst::ICMP_SGE;
+ case ISD::SETUGE:
+ return ICmpInst::ICMP_UGE;
+ case ISD::SETLT:
+ return ICmpInst::ICMP_SLT;
+ case ISD::SETULT:
+ return ICmpInst::ICMP_ULT;
+ case ISD::SETGT:
+ return ICmpInst::ICMP_SGT;
+ case ISD::SETUGT:
+ return ICmpInst::ICMP_UGT;
+ default:
+ llvm_unreachable("Invalid ISD integer condition code!");
+ }
+}
+
static bool isNoopBitcast(Type *T1, Type *T2,
const TargetLoweringBase& TLI) {
return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()) ||
@@ -524,10 +548,8 @@ bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM) {
if (&*BBI == &Call)
break;
// Debug info intrinsics do not get in the way of tail call optimization.
- if (isa<DbgInfoIntrinsic>(BBI))
- continue;
// Pseudo probe intrinsics do not block tail call optimization either.
- if (isa<PseudoProbeInst>(BBI))
+ if (BBI->isDebugOrPseudoInst())
continue;
// A lifetime end, assume or noalias.decl intrinsic should not stop tail
// call optimization.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
index db4215e92d44..223840c21d8b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -75,7 +75,6 @@ void ARMException::endFunction(const MachineFunction *MF) {
// Emit references to personality.
if (Per) {
MCSymbol *PerSym = Asm->getSymbol(Per);
- Asm->OutStreamer->emitSymbolAttribute(PerSym, MCSA_Global);
ATS.emitPersonality(PerSym);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index e528d33b5f8c..cc848d28a9a7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -71,7 +71,6 @@
#include "llvm/IR/GCStrategy.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalIFunc.h"
-#include "llvm/IR/GlobalIndirectSymbol.h"
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -102,6 +101,7 @@
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/SectionKind.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
#include "llvm/Remarks/Remark.h"
#include "llvm/Remarks/RemarkFormat.h"
@@ -115,7 +115,6 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -275,7 +274,7 @@ bool AsmPrinter::doInitialization(Module &M) {
const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
.getModuleMetadata(M);
- OutStreamer->InitSections(false);
+ OutStreamer->initSections(false, *TM.getMCSubtargetInfo());
if (DisableDebugInfoPrinting)
MMI->setDebugInfoAvailability(false);
@@ -326,16 +325,10 @@ bool AsmPrinter::doInitialization(Module &M) {
// Emit module-level inline asm if it exists.
if (!M.getModuleInlineAsm().empty()) {
- // We're at the module level. Construct MCSubtarget from the default CPU
- // and target triple.
- std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
- TM.getTargetTriple().str(), TM.getTargetCPU(),
- TM.getTargetFeatureString()));
- assert(STI && "Unable to create subtarget info");
OutStreamer->AddComment("Start of file scope inline assembly");
OutStreamer->AddBlankLine();
- emitInlineAsm(M.getModuleInlineAsm() + "\n",
- OutContext.getSubtargetCopy(*STI), TM.Options.MCOptions);
+ emitInlineAsm(M.getModuleInlineAsm() + "\n", *TM.getMCSubtargetInfo(),
+ TM.Options.MCOptions);
OutStreamer->AddComment("End of file scope inline assembly");
OutStreamer->AddBlankLine();
}
@@ -1422,7 +1415,7 @@ void AsmPrinter::emitFunctionBody() {
});
R << "BasicBlock: " << ore::NV("BasicBlock", MBB.getName()) << "\n";
for (auto &KV : MnemonicVec) {
- auto Name = (Twine("INST_") + KV.first.trim()).str();
+ auto Name = (Twine("INST_") + getToken(KV.first.trim()).first).str();
R << KV.first << ": " << ore::NV(Name, KV.second) << "\n";
}
ORE->emit(R);
@@ -1610,14 +1603,13 @@ void AsmPrinter::emitGlobalGOTEquivs() {
emitGlobalVariable(GV);
}
-void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
- const GlobalIndirectSymbol& GIS) {
- MCSymbol *Name = getSymbol(&GIS);
- bool IsFunction = GIS.getValueType()->isFunctionTy();
+void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) {
+ MCSymbol *Name = getSymbol(&GA);
+ bool IsFunction = GA.getValueType()->isFunctionTy();
// Treat bitcasts of functions as functions also. This is important at least
// on WebAssembly where object and function addresses can't alias each other.
if (!IsFunction)
- if (auto *CE = dyn_cast<ConstantExpr>(GIS.getIndirectSymbol()))
+ if (auto *CE = dyn_cast<ConstantExpr>(GA.getAliasee()))
if (CE->getOpcode() == Instruction::BitCast)
IsFunction =
CE->getOperand(0)->getType()->getPointerElementType()->isFunctionTy();
@@ -1627,61 +1619,80 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
// point, all the extra label is emitted, we just have to emit linkage for
// those labels.
if (TM.getTargetTriple().isOSBinFormatXCOFF()) {
- assert(!isa<GlobalIFunc>(GIS) && "IFunc is not supported on AIX.");
assert(MAI->hasVisibilityOnlyWithLinkage() &&
"Visibility should be handled with emitLinkage() on AIX.");
- emitLinkage(&GIS, Name);
+ emitLinkage(&GA, Name);
// If it's a function, also emit linkage for aliases of function entry
// point.
if (IsFunction)
- emitLinkage(&GIS,
- getObjFileLowering().getFunctionEntryPointSymbol(&GIS, TM));
+ emitLinkage(&GA,
+ getObjFileLowering().getFunctionEntryPointSymbol(&GA, TM));
return;
}
- if (GIS.hasExternalLinkage() || !MAI->getWeakRefDirective())
+ if (GA.hasExternalLinkage() || !MAI->getWeakRefDirective())
OutStreamer->emitSymbolAttribute(Name, MCSA_Global);
- else if (GIS.hasWeakLinkage() || GIS.hasLinkOnceLinkage())
+ else if (GA.hasWeakLinkage() || GA.hasLinkOnceLinkage())
OutStreamer->emitSymbolAttribute(Name, MCSA_WeakReference);
else
- assert(GIS.hasLocalLinkage() && "Invalid alias or ifunc linkage");
+ assert(GA.hasLocalLinkage() && "Invalid alias linkage");
// Set the symbol type to function if the alias has a function type.
// This affects codegen when the aliasee is not a function.
if (IsFunction)
- OutStreamer->emitSymbolAttribute(Name, isa<GlobalIFunc>(GIS)
- ? MCSA_ELF_TypeIndFunction
- : MCSA_ELF_TypeFunction);
+ OutStreamer->emitSymbolAttribute(Name, MCSA_ELF_TypeFunction);
- emitVisibility(Name, GIS.getVisibility());
+ emitVisibility(Name, GA.getVisibility());
- const MCExpr *Expr = lowerConstant(GIS.getIndirectSymbol());
+ const MCExpr *Expr = lowerConstant(GA.getAliasee());
- if (isa<GlobalAlias>(&GIS) && MAI->hasAltEntry() && isa<MCBinaryExpr>(Expr))
+ if (MAI->hasAltEntry() && isa<MCBinaryExpr>(Expr))
OutStreamer->emitSymbolAttribute(Name, MCSA_AltEntry);
// Emit the directives as assignments aka .set:
OutStreamer->emitAssignment(Name, Expr);
- MCSymbol *LocalAlias = getSymbolPreferLocal(GIS);
+ MCSymbol *LocalAlias = getSymbolPreferLocal(GA);
if (LocalAlias != Name)
OutStreamer->emitAssignment(LocalAlias, Expr);
- if (auto *GA = dyn_cast<GlobalAlias>(&GIS)) {
- // If the aliasee does not correspond to a symbol in the output, i.e. the
- // alias is not of an object or the aliased object is private, then set the
- // size of the alias symbol from the type of the alias. We don't do this in
- // other situations as the alias and aliasee having differing types but same
- // size may be intentional.
- const GlobalObject *BaseObject = GA->getBaseObject();
- if (MAI->hasDotTypeDotSizeDirective() && GA->getValueType()->isSized() &&
- (!BaseObject || BaseObject->hasPrivateLinkage())) {
- const DataLayout &DL = M.getDataLayout();
- uint64_t Size = DL.getTypeAllocSize(GA->getValueType());
- OutStreamer->emitELFSize(Name, MCConstantExpr::create(Size, OutContext));
- }
+ // If the aliasee does not correspond to a symbol in the output, i.e. the
+ // alias is not of an object or the aliased object is private, then set the
+ // size of the alias symbol from the type of the alias. We don't do this in
+ // other situations as the alias and aliasee having differing types but same
+ // size may be intentional.
+ const GlobalObject *BaseObject = GA.getAliaseeObject();
+ if (MAI->hasDotTypeDotSizeDirective() && GA.getValueType()->isSized() &&
+ (!BaseObject || BaseObject->hasPrivateLinkage())) {
+ const DataLayout &DL = M.getDataLayout();
+ uint64_t Size = DL.getTypeAllocSize(GA.getValueType());
+ OutStreamer->emitELFSize(Name, MCConstantExpr::create(Size, OutContext));
}
}
+void AsmPrinter::emitGlobalIFunc(Module &M, const GlobalIFunc &GI) {
+ assert(!TM.getTargetTriple().isOSBinFormatXCOFF() &&
+ "IFunc is not supported on AIX.");
+
+ MCSymbol *Name = getSymbol(&GI);
+
+ if (GI.hasExternalLinkage() || !MAI->getWeakRefDirective())
+ OutStreamer->emitSymbolAttribute(Name, MCSA_Global);
+ else if (GI.hasWeakLinkage() || GI.hasLinkOnceLinkage())
+ OutStreamer->emitSymbolAttribute(Name, MCSA_WeakReference);
+ else
+ assert(GI.hasLocalLinkage() && "Invalid ifunc linkage");
+
+ OutStreamer->emitSymbolAttribute(Name, MCSA_ELF_TypeIndFunction);
+ emitVisibility(Name, GI.getVisibility());
+
+ // Emit the directives as assignments aka .set:
+ const MCExpr *Expr = lowerConstant(GI.getResolver());
+ OutStreamer->emitAssignment(Name, Expr);
+ MCSymbol *LocalAlias = getSymbolPreferLocal(GI);
+ if (LocalAlias != Name)
+ OutStreamer->emitAssignment(LocalAlias, Expr);
+}
+
void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) {
if (!RS.needsSection())
return;
@@ -1815,6 +1826,11 @@ bool AsmPrinter::doFinalization(Module &M) {
}
}
+ // This needs to happen before emitting debug information since that can end
+ // arbitrary sections.
+ if (auto *TS = OutStreamer->getTargetStreamer())
+ TS->emitConstantPools();
+
// Finalize debug and EH information.
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
@@ -1857,11 +1873,11 @@ bool AsmPrinter::doFinalization(Module &M) {
AliasStack.push_back(Cur);
}
for (const GlobalAlias *AncestorAlias : llvm::reverse(AliasStack))
- emitGlobalIndirectSymbol(M, *AncestorAlias);
+ emitGlobalAlias(M, *AncestorAlias);
AliasStack.clear();
}
for (const auto &IFunc : M.ifuncs())
- emitGlobalIndirectSymbol(M, IFunc);
+ emitGlobalIFunc(M, IFunc);
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
assert(MI && "AsmPrinter didn't require GCModuleInfo?");
@@ -2455,9 +2471,14 @@ void AsmPrinter::emitAlignment(Align Alignment, const GlobalObject *GV) const {
if (Alignment == Align(1))
return; // 1-byte aligned: no need to emit alignment.
- if (getCurrentSection()->getKind().isText())
- OutStreamer->emitCodeAlignment(Alignment.value());
- else
+ if (getCurrentSection()->getKind().isText()) {
+ const MCSubtargetInfo *STI = nullptr;
+ if (this->MF)
+ STI = &getSubtargetInfo();
+ else
+ STI = TM.getMCSubtargetInfo();
+ OutStreamer->emitCodeAlignment(Alignment.value(), STI);
+ } else
OutStreamer->emitValueToAlignment(Alignment.value());
}
@@ -2513,7 +2534,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
OS << "Unsupported expression in static initializer: ";
CE->printAsOperand(OS, /*PrintType=*/false,
!MF ? nullptr : MF->getFunction().getParent());
- report_fatal_error(OS.str());
+ report_fatal_error(Twine(OS.str()));
}
case Instruction::GetElementPtr: {
// Generate a symbolic expression for the byte address
@@ -3265,21 +3286,21 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
// reference the block. It is possible that there is more than one label
// here, because multiple LLVM BB's may have been RAUW'd to this block after
// the references were generated.
+ const BasicBlock *BB = MBB.getBasicBlock();
if (MBB.hasAddressTaken()) {
- const BasicBlock *BB = MBB.getBasicBlock();
if (isVerbose())
OutStreamer->AddComment("Block address taken");
// MBBs can have their address taken as part of CodeGen without having
// their corresponding BB's address taken in IR
- if (BB->hasAddressTaken())
+ if (BB && BB->hasAddressTaken())
for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB))
OutStreamer->emitLabel(Sym);
}
// Print some verbose block comments.
if (isVerbose()) {
- if (const BasicBlock *BB = MBB.getBasicBlock()) {
+ if (BB) {
if (BB->hasName()) {
BB->printAsOperand(OutStreamer->GetCommentOS(),
/*PrintType=*/false, BB->getModule());
@@ -3538,7 +3559,7 @@ void AsmPrinter::emitXRayTable() {
// pointers. This should work for both 32-bit and 64-bit platforms.
if (FnSledIndex) {
OutStreamer->SwitchSection(FnSledIndex);
- OutStreamer->emitCodeAlignment(2 * WordSizeBytes);
+ OutStreamer->emitCodeAlignment(2 * WordSizeBytes, &getSubtargetInfo());
OutStreamer->emitSymbolValue(SledsStart, WordSizeBytes, false);
OutStreamer->emitSymbolValue(SledsEnd, WordSizeBytes, false);
OutStreamer->SwitchSection(PrevSection);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 4a93181f5439..ef1abc47701a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -30,10 +30,10 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -129,13 +129,16 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
}
static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
- MachineModuleInfo *MMI, AsmPrinter *AP,
- uint64_t LocCookie, raw_ostream &OS) {
+ MachineModuleInfo *MMI, const MCAsmInfo *MAI,
+ AsmPrinter *AP, uint64_t LocCookie,
+ raw_ostream &OS) {
// Switch to the inline assembly variant.
OS << "\t.intel_syntax\n\t";
+ int CurVariant = -1; // The number of the {.|.|.} region we are in.
const char *LastEmitted = AsmStr; // One past the last character emitted.
unsigned NumOperands = MI->getNumOperands();
+ int AsmPrinterVariant = 1; // X86MCAsmInfo.cpp's AsmWriterFlavorTy::Intel.
while (*LastEmitted) {
switch (*LastEmitted) {
@@ -145,8 +148,8 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
*LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
++LiteralEnd;
-
- OS.write(LastEmitted, LiteralEnd-LastEmitted);
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ OS.write(LastEmitted, LiteralEnd - LastEmitted);
LastEmitted = LiteralEnd;
break;
}
@@ -164,6 +167,27 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
case '$':
++LastEmitted; // Consume second '$' character.
break;
+ case '(': // $( -> same as GCC's { character.
+ ++LastEmitted; // Consume '(' character.
+ if (CurVariant != -1)
+ report_fatal_error("Nested variants found in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ CurVariant = 0; // We're in the first variant now.
+ break;
+ case '|':
+ ++LastEmitted; // Consume '|' character.
+ if (CurVariant == -1)
+ OS << '|'; // This is gcc's behavior for | outside a variant.
+ else
+ ++CurVariant; // We're in the next variant.
+ break;
+ case ')': // $) -> same as GCC's } char.
+ ++LastEmitted; // Consume ')' character.
+ if (CurVariant == -1)
+ OS << '}'; // This is gcc's behavior for } outside a variant.
+ else
+ CurVariant = -1;
+ break;
}
if (Done) break;
@@ -176,16 +200,15 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
// If we have ${:foo}, then this is not a real operand reference, it is a
// "magic" string reference, just like in .td files. Arrange to call
// PrintSpecial.
- if (HasCurlyBraces && LastEmitted[0] == ':') {
+ if (HasCurlyBraces && *LastEmitted == ':') {
++LastEmitted;
const char *StrStart = LastEmitted;
const char *StrEnd = strchr(StrStart, '}');
if (!StrEnd)
report_fatal_error("Unterminated ${:foo} operand in inline asm"
" string: '" + Twine(AsmStr) + "'");
-
- std::string Val(StrStart, StrEnd);
- AP->PrintSpecial(MI, OS, Val.c_str());
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ AP->PrintSpecial(MI, OS, StringRef(StrStart, StrEnd - StrStart));
LastEmitted = StrEnd+1;
break;
}
@@ -201,7 +224,7 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
Twine(AsmStr) + "'");
LastEmitted = IDEnd;
- if (Val >= NumOperands-1)
+ if (Val >= NumOperands - 1)
report_fatal_error("Invalid $ operand number in inline asm string: '" +
Twine(AsmStr) + "'");
@@ -228,40 +251,50 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
// Okay, we finally have a value number. Ask the target to print this
// operand!
- unsigned OpNo = InlineAsm::MIOp_FirstOperand;
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
+ unsigned OpNo = InlineAsm::MIOp_FirstOperand;
- bool Error = false;
+ bool Error = false;
- // Scan to find the machine operand number for the operand.
- for (; Val; --Val) {
- if (OpNo >= MI->getNumOperands()) break;
- unsigned OpFlags = MI->getOperand(OpNo).getImm();
- OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
- }
+ // Scan to find the machine operand number for the operand.
+ for (; Val; --Val) {
+ if (OpNo >= MI->getNumOperands())
+ break;
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+ }
- // We may have a location metadata attached to the end of the
- // instruction, and at no point should see metadata at any
- // other point while processing. It's an error if so.
- if (OpNo >= MI->getNumOperands() ||
- MI->getOperand(OpNo).isMetadata()) {
- Error = true;
- } else {
- unsigned OpFlags = MI->getOperand(OpNo).getImm();
- ++OpNo; // Skip over the ID number.
-
- if (InlineAsm::isMemKind(OpFlags)) {
- Error = AP->PrintAsmMemoryOperand(
- MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
+ // We may have a location metadata attached to the end of the
+ // instruction, and at no point should see metadata at any
+ // other point while processing. It's an error if so.
+ if (OpNo >= MI->getNumOperands() || MI->getOperand(OpNo).isMetadata()) {
+ Error = true;
} else {
- Error = AP->PrintAsmOperand(MI, OpNo,
- Modifier[0] ? Modifier : nullptr, OS);
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ ++OpNo; // Skip over the ID number.
+
+ // FIXME: Shouldn't arch-independent output template handling go into
+ // PrintAsmOperand?
+ // Labels are target independent.
+ if (MI->getOperand(OpNo).isBlockAddress()) {
+ const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
+ MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
+ Sym->print(OS, AP->MAI);
+ MMI->getContext().registerInlineAsmLabel(Sym);
+ } else if (InlineAsm::isMemKind(OpFlags)) {
+ Error = AP->PrintAsmMemoryOperand(
+ MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
+ } else {
+ Error = AP->PrintAsmOperand(MI, OpNo,
+ Modifier[0] ? Modifier : nullptr, OS);
+ }
+ }
+ if (Error) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "invalid operand in inline asm: '" << AsmStr << "'";
+ MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
}
- }
- if (Error) {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "invalid operand in inline asm: '" << AsmStr << "'";
- MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
}
break;
}
@@ -274,10 +307,10 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
MachineModuleInfo *MMI, const MCAsmInfo *MAI,
AsmPrinter *AP, uint64_t LocCookie,
raw_ostream &OS) {
- int CurVariant = -1; // The number of the {.|.|.} region we are in.
+ int CurVariant = -1; // The number of the {.|.|.} region we are in.
const char *LastEmitted = AsmStr; // One past the last character emitted.
unsigned NumOperands = MI->getNumOperands();
- int AsmPrinterVariant = MAI->getAssemblerDialect();
+ int AsmPrinterVariant = MMI->getTarget().unqualifiedInlineAsmVariant();
if (MAI->getEmitGNUAsmStartIndentationMarker())
OS << '\t';
@@ -291,7 +324,7 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
*LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
++LiteralEnd;
if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
- OS.write(LastEmitted, LiteralEnd-LastEmitted);
+ OS.write(LastEmitted, LiteralEnd - LastEmitted);
LastEmitted = LiteralEnd;
break;
}
@@ -311,24 +344,24 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
OS << '$';
++LastEmitted; // Consume second '$' character.
break;
- case '(': // $( -> same as GCC's { character.
- ++LastEmitted; // Consume '(' character.
+ case '(': // $( -> same as GCC's { character.
+ ++LastEmitted; // Consume '(' character.
if (CurVariant != -1)
report_fatal_error("Nested variants found in inline asm string: '" +
Twine(AsmStr) + "'");
- CurVariant = 0; // We're in the first variant now.
+ CurVariant = 0; // We're in the first variant now.
break;
case '|':
- ++LastEmitted; // consume '|' character.
+ ++LastEmitted; // Consume '|' character.
if (CurVariant == -1)
- OS << '|'; // this is gcc's behavior for | outside a variant
+ OS << '|'; // This is gcc's behavior for | outside a variant.
else
- ++CurVariant; // We're in the next variant.
+ ++CurVariant; // We're in the next variant.
break;
- case ')': // $) -> same as GCC's } char.
- ++LastEmitted; // consume ')' character.
+ case ')': // $) -> same as GCC's } char.
+ ++LastEmitted; // Consume ')' character.
if (CurVariant == -1)
- OS << '}'; // this is gcc's behavior for } outside a variant
+ OS << '}'; // This is gcc's behavior for } outside a variant.
else
CurVariant = -1;
break;
@@ -351,9 +384,8 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
if (!StrEnd)
report_fatal_error("Unterminated ${:foo} operand in inline asm"
" string: '" + Twine(AsmStr) + "'");
-
- std::string Val(StrStart, StrEnd);
- AP->PrintSpecial(MI, OS, Val.c_str());
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ AP->PrintSpecial(MI, OS, StringRef(StrStart, StrEnd - StrStart));
LastEmitted = StrEnd+1;
break;
}
@@ -369,6 +401,10 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
Twine(AsmStr) + "'");
LastEmitted = IDEnd;
+ if (Val >= NumOperands - 1)
+ report_fatal_error("Invalid $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
+
char Modifier[2] = { 0, 0 };
if (HasCurlyBraces) {
@@ -390,10 +426,6 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
++LastEmitted; // Consume '}' character.
}
- if (Val >= NumOperands-1)
- report_fatal_error("Invalid $ operand number in inline asm string: '" +
- Twine(AsmStr) + "'");
-
// Okay, we finally have a value number. Ask the target to print this
// operand!
if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
@@ -403,7 +435,8 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
// Scan to find the machine operand number for the operand.
for (; Val; --Val) {
- if (OpNo >= MI->getNumOperands()) break;
+ if (OpNo >= MI->getNumOperands())
+ break;
unsigned OpFlags = MI->getOperand(OpNo).getImm();
OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
}
@@ -411,12 +444,11 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
// We may have a location metadata attached to the end of the
// instruction, and at no point should see metadata at any
// other point while processing. It's an error if so.
- if (OpNo >= MI->getNumOperands() ||
- MI->getOperand(OpNo).isMetadata()) {
+ if (OpNo >= MI->getNumOperands() || MI->getOperand(OpNo).isMetadata()) {
Error = true;
} else {
unsigned OpFlags = MI->getOperand(OpNo).getImm();
- ++OpNo; // Skip over the ID number.
+ ++OpNo; // Skip over the ID number.
// FIXME: Shouldn't arch-independent output template handling go into
// PrintAsmOperand?
@@ -429,8 +461,6 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
} else if (MI->getOperand(OpNo).isMBB()) {
const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
Sym->print(OS, AP->MAI);
- } else if (Modifier[0] == 'l') {
- Error = true;
} else if (InlineAsm::isMemKind(OpFlags)) {
Error = AP->PrintAsmMemoryOperand(
MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
@@ -506,7 +536,7 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT)
EmitGCCInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS);
else
- EmitMSInlineAsmStr(AsmStr, MI, MMI, AP, LocCookie, OS);
+ EmitMSInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS);
// Emit warnings if we use reserved registers on the clobber list, as
// that might lead to undefined behaviour.
@@ -540,7 +570,7 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
"preserved across the asm statement, and clobbering them may "
"lead to undefined behaviour.";
MMI->getModule()->getContext().diagnose(DiagnosticInfoInlineAsm(
- LocCookie, Msg.c_str(), DiagnosticSeverity::DS_Warning));
+ LocCookie, Msg, DiagnosticSeverity::DS_Warning));
MMI->getModule()->getContext().diagnose(
DiagnosticInfoInlineAsm(LocCookie, Note, DiagnosticSeverity::DS_Note));
}
@@ -560,13 +590,13 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
/// syntax used is ${:comment}. Targets can override this to add support
/// for their own strange codes.
void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
- const char *Code) const {
- if (!strcmp(Code, "private")) {
+ StringRef Code) const {
+ if (Code == "private") {
const DataLayout &DL = MF->getDataLayout();
OS << DL.getPrivateGlobalPrefix();
- } else if (!strcmp(Code, "comment")) {
+ } else if (Code == "comment") {
OS << MAI->getCommentString();
- } else if (!strcmp(Code, "uid")) {
+ } else if (Code == "uid") {
// Comparing the address of MI isn't sufficient, because machineinstrs may
// be allocated to the same address across functions.
@@ -582,7 +612,7 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
raw_string_ostream Msg(msg);
Msg << "Unknown special formatter '" << Code
<< "' for machine instr: " << *MI;
- report_fatal_error(Msg.str());
+ report_fatal_error(Twine(Msg.str()));
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index bbb0504550c3..85ff84484ced 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -341,7 +341,16 @@ std::string CodeViewDebug::getFullyQualifiedName(const DIScope *Ty) {
TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) {
// No scope means global scope and that uses the zero index.
- if (!Scope || isa<DIFile>(Scope))
+ //
+ // We also use zero index when the scope is a DISubprogram
+ // to suppress the emission of LF_STRING_ID for the function,
+ // which can trigger a link-time error with the linker in
+ // VS2019 version 16.11.2 or newer.
+ // Note, however, skipping the debug info emission for the DISubprogram
+ // is a temporary fix. The root issue here is that we need to figure out
+ // the proper way to encode a function nested in another function
+ // (as introduced by the Fortran 'contains' keyword) in CodeView.
+ if (!Scope || isa<DIFile>(Scope) || isa<DISubprogram>(Scope))
return TypeIndex();
assert(!isa<DIType>(Scope) && "shouldn't make a namespace scope for a type");
@@ -561,6 +570,44 @@ void CodeViewDebug::emitCodeViewMagicVersion() {
OS.emitInt32(COFF::DEBUG_SECTION_MAGIC);
}
+static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
+ switch (DWLang) {
+ case dwarf::DW_LANG_C:
+ case dwarf::DW_LANG_C89:
+ case dwarf::DW_LANG_C99:
+ case dwarf::DW_LANG_C11:
+ case dwarf::DW_LANG_ObjC:
+ return SourceLanguage::C;
+ case dwarf::DW_LANG_C_plus_plus:
+ case dwarf::DW_LANG_C_plus_plus_03:
+ case dwarf::DW_LANG_C_plus_plus_11:
+ case dwarf::DW_LANG_C_plus_plus_14:
+ return SourceLanguage::Cpp;
+ case dwarf::DW_LANG_Fortran77:
+ case dwarf::DW_LANG_Fortran90:
+ case dwarf::DW_LANG_Fortran95:
+ case dwarf::DW_LANG_Fortran03:
+ case dwarf::DW_LANG_Fortran08:
+ return SourceLanguage::Fortran;
+ case dwarf::DW_LANG_Pascal83:
+ return SourceLanguage::Pascal;
+ case dwarf::DW_LANG_Cobol74:
+ case dwarf::DW_LANG_Cobol85:
+ return SourceLanguage::Cobol;
+ case dwarf::DW_LANG_Java:
+ return SourceLanguage::Java;
+ case dwarf::DW_LANG_D:
+ return SourceLanguage::D;
+ case dwarf::DW_LANG_Swift:
+ return SourceLanguage::Swift;
+ default:
+ // There's no CodeView representation for this language, and CV doesn't
+ // have an "unknown" option for the language field, so we'll use MASM,
+ // as it's very low level.
+ return SourceLanguage::Masm;
+ }
+}
+
void CodeViewDebug::beginModule(Module *M) {
// If module doesn't have named metadata anchors or COFF debug section
// is not available, skip any debug info related stuff.
@@ -574,6 +621,13 @@ void CodeViewDebug::beginModule(Module *M) {
TheCPU = mapArchToCVCPUType(Triple(M->getTargetTriple()).getArch());
+ // Get the current source language.
+ NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
+ const MDNode *Node = *CUs->operands().begin();
+ const auto *CU = cast<DICompileUnit>(Node);
+
+ CurrentSourceLanguage = MapDWLangToCVLang(CU->getSourceLanguage());
+
collectGlobalVariableInfo();
// Check if we should emit type record hashes.
@@ -731,43 +785,6 @@ void CodeViewDebug::emitTypeGlobalHashes() {
}
}
-static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
- switch (DWLang) {
- case dwarf::DW_LANG_C:
- case dwarf::DW_LANG_C89:
- case dwarf::DW_LANG_C99:
- case dwarf::DW_LANG_C11:
- case dwarf::DW_LANG_ObjC:
- return SourceLanguage::C;
- case dwarf::DW_LANG_C_plus_plus:
- case dwarf::DW_LANG_C_plus_plus_03:
- case dwarf::DW_LANG_C_plus_plus_11:
- case dwarf::DW_LANG_C_plus_plus_14:
- return SourceLanguage::Cpp;
- case dwarf::DW_LANG_Fortran77:
- case dwarf::DW_LANG_Fortran90:
- case dwarf::DW_LANG_Fortran03:
- case dwarf::DW_LANG_Fortran08:
- return SourceLanguage::Fortran;
- case dwarf::DW_LANG_Pascal83:
- return SourceLanguage::Pascal;
- case dwarf::DW_LANG_Cobol74:
- case dwarf::DW_LANG_Cobol85:
- return SourceLanguage::Cobol;
- case dwarf::DW_LANG_Java:
- return SourceLanguage::Java;
- case dwarf::DW_LANG_D:
- return SourceLanguage::D;
- case dwarf::DW_LANG_Swift:
- return SourceLanguage::Swift;
- default:
- // There's no CodeView representation for this language, and CV doesn't
- // have an "unknown" option for the language field, so we'll use MASM,
- // as it's very low level.
- return SourceLanguage::Masm;
- }
-}
-
namespace {
struct Version {
int Part[4];
@@ -797,12 +814,8 @@ void CodeViewDebug::emitCompilerInformation() {
MCSymbol *CompilerEnd = beginSymbolRecord(SymbolKind::S_COMPILE3);
uint32_t Flags = 0;
- NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
- const MDNode *Node = *CUs->operands().begin();
- const auto *CU = cast<DICompileUnit>(Node);
-
// The low byte of the flags indicates the source language.
- Flags = MapDWLangToCVLang(CU->getSourceLanguage());
+ Flags = CurrentSourceLanguage;
// TODO: Figure out which other flags need to be set.
if (MMI->getModule()->getProfileSummary(/*IsCS*/ false) != nullptr) {
Flags |= static_cast<uint32_t>(CompileSym3Flags::PGO);
@@ -814,6 +827,10 @@ void CodeViewDebug::emitCompilerInformation() {
OS.AddComment("CPUType");
OS.emitInt16(static_cast<uint64_t>(TheCPU));
+ NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
+ const MDNode *Node = *CUs->operands().begin();
+ const auto *CU = cast<DICompileUnit>(Node);
+
StringRef CompilerVersion = CU->getProducer();
Version FrontVer = parseVersion(CompilerVersion);
OS.AddComment("Frontend version");
@@ -1573,6 +1590,8 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
return lowerTypeClass(cast<DICompositeType>(Ty));
case dwarf::DW_TAG_union_type:
return lowerTypeUnion(cast<DICompositeType>(Ty));
+ case dwarf::DW_TAG_string_type:
+ return lowerTypeString(cast<DIStringType>(Ty));
case dwarf::DW_TAG_unspecified_type:
if (Ty->getName() == "decltype(nullptr)")
return TypeIndex::NullptrT();
@@ -1617,14 +1636,19 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
const DISubrange *Subrange = cast<DISubrange>(Element);
int64_t Count = -1;
- // Calculate the count if either LowerBound is absent or is zero and
- // either of Count or UpperBound are constant.
- auto *LI = Subrange->getLowerBound().dyn_cast<ConstantInt *>();
- if (!Subrange->getRawLowerBound() || (LI && (LI->getSExtValue() == 0))) {
- if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt*>())
- Count = CI->getSExtValue();
- else if (auto *UI = Subrange->getUpperBound().dyn_cast<ConstantInt*>())
- Count = UI->getSExtValue() + 1; // LowerBound is zero
+
+ // If Subrange has a Count field, use it.
+ // Otherwise, if it has an upperboud, use (upperbound - lowerbound + 1),
+ // where lowerbound is from the LowerBound field of the Subrange,
+ // or the language default lowerbound if that field is unspecified.
+ if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt *>())
+ Count = CI->getSExtValue();
+ else if (auto *UI = Subrange->getUpperBound().dyn_cast<ConstantInt *>()) {
+ // Fortran uses 1 as the default lowerbound; other languages use 0.
+ int64_t Lowerbound = (moduleIsInFortran()) ? 1 : 0;
+ auto *LI = Subrange->getLowerBound().dyn_cast<ConstantInt *>();
+ Lowerbound = (LI) ? LI->getSExtValue() : Lowerbound;
+ Count = UI->getSExtValue() - Lowerbound + 1;
}
// Forward declarations of arrays without a size and VLAs use a count of -1.
@@ -1650,6 +1674,26 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
return ElementTypeIndex;
}
+// This function lowers a Fortran character type (DIStringType).
+// Note that it handles only the character*n variant (using SizeInBits
+// field in DIString to describe the type size) at the moment.
+// Other variants (leveraging the StringLength and StringLengthExp
+// fields in DIStringType) remain TBD.
+TypeIndex CodeViewDebug::lowerTypeString(const DIStringType *Ty) {
+ TypeIndex CharType = TypeIndex(SimpleTypeKind::NarrowCharacter);
+ uint64_t ArraySize = Ty->getSizeInBits() >> 3;
+ StringRef Name = Ty->getName();
+ // IndexType is size_t, which depends on the bitness of the target.
+ TypeIndex IndexType = getPointerSizeInBytes() == 8
+ ? TypeIndex(SimpleTypeKind::UInt64Quad)
+ : TypeIndex(SimpleTypeKind::UInt32Long);
+
+ // Create a type of character array of ArraySize.
+ ArrayRecord AR(CharType, IndexType, ArraySize, Name);
+
+ return TypeTable.writeLeafType(AR);
+}
+
TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {
TypeIndex Index;
dwarf::TypeKind Kind;
@@ -1728,9 +1772,14 @@ TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {
}
// Apply some fixups based on the source-level type name.
- if (STK == SimpleTypeKind::Int32 && Ty->getName() == "long int")
+ // Include some amount of canonicalization from an old naming scheme Clang
+ // used to use for integer types (in an outdated effort to be compatible with
+ // GCC's debug info/GDB's behavior, which has since been addressed).
+ if (STK == SimpleTypeKind::Int32 &&
+ (Ty->getName() == "long int" || Ty->getName() == "long"))
STK = SimpleTypeKind::Int32Long;
- if (STK == SimpleTypeKind::UInt32 && Ty->getName() == "long unsigned int")
+ if (STK == SimpleTypeKind::UInt32 && (Ty->getName() == "long unsigned int" ||
+ Ty->getName() == "unsigned long"))
STK = SimpleTypeKind::UInt32Long;
if (STK == SimpleTypeKind::UInt16Short &&
(Ty->getName() == "wchar_t" || Ty->getName() == "__wchar_t"))
@@ -2177,6 +2226,7 @@ void CodeViewDebug::clear() {
TypeIndices.clear();
CompleteTypeIndices.clear();
ScopeGlobals.clear();
+ CVGlobalVariableOffsets.clear();
}
void CodeViewDebug::collectMemberInfo(ClassInfo &Info,
@@ -3062,6 +3112,15 @@ void CodeViewDebug::collectGlobalVariableInfo() {
const DIGlobalVariable *DIGV = GVE->getVariable();
const DIExpression *DIE = GVE->getExpression();
+ if ((DIE->getNumElements() == 2) &&
+ (DIE->getElement(0) == dwarf::DW_OP_plus_uconst))
+ // Record the constant offset for the variable.
+ //
+ // A Fortran common block uses this idiom to encode the offset
+ // of a variable from the common block's starting address.
+ CVGlobalVariableOffsets.insert(
+ std::make_pair(DIGV, DIE->getElement(1)));
+
// Emit constant global variables in a global symbol section.
if (GlobalMap.count(GVE) == 0 && DIE->isConstant()) {
CVGlobalVariable CVGV = {DIGV, DIE};
@@ -3226,7 +3285,11 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
if (const auto *MemberDecl = dyn_cast_or_null<DIDerivedType>(
DIGV->getRawStaticDataMemberDeclaration()))
Scope = MemberDecl->getScope();
- std::string QualifiedName = getFullyQualifiedName(Scope, DIGV->getName());
+ // For Fortran, the scoping portion is elided in its name so that we can
+ // reference the variable in the command line of the VS debugger.
+ std::string QualifiedName =
+ (moduleIsInFortran()) ? std::string(DIGV->getName())
+ : getFullyQualifiedName(Scope, DIGV->getName());
if (const GlobalVariable *GV =
CVGV.GVInfo.dyn_cast<const GlobalVariable *>()) {
@@ -3242,7 +3305,13 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
OS.AddComment("Type");
OS.emitInt32(getCompleteTypeIndex(DIGV->getType()).getIndex());
OS.AddComment("DataOffset");
- OS.EmitCOFFSecRel32(GVSym, /*Offset=*/0);
+
+ uint64_t Offset = 0;
+ if (CVGlobalVariableOffsets.find(DIGV) != CVGlobalVariableOffsets.end())
+ // Use the offset seen while collecting info on globals.
+ Offset = CVGlobalVariableOffsets[DIGV];
+ OS.EmitCOFFSecRel32(GVSym, Offset);
+
OS.AddComment("Segment");
OS.EmitCOFFSectionIndex(GVSym);
OS.AddComment("Name");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index d133474ee5aa..6f88e15ee8fe 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -186,6 +186,13 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
};
FunctionInfo *CurFn = nullptr;
+ codeview::SourceLanguage CurrentSourceLanguage =
+ codeview::SourceLanguage::Masm;
+
+ // This map records the constant offset in DIExpression of the
+ // DIGlobalVariableExpression referencing the DIGlobalVariable.
+ DenseMap<const DIGlobalVariable *, uint64_t> CVGlobalVariableOffsets;
+
// Map used to seperate variables according to the lexical scope they belong
// in. This is populated by recordLocalVariable() before
// collectLexicalBlocks() separates the variables between the FunctionInfo
@@ -400,6 +407,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
codeview::TypeIndex lowerType(const DIType *Ty, const DIType *ClassTy);
codeview::TypeIndex lowerTypeAlias(const DIDerivedType *Ty);
codeview::TypeIndex lowerTypeArray(const DICompositeType *Ty);
+ codeview::TypeIndex lowerTypeString(const DIStringType *Ty);
codeview::TypeIndex lowerTypeBasic(const DIBasicType *Ty);
codeview::TypeIndex lowerTypePointer(
const DIDerivedType *Ty,
@@ -464,6 +472,11 @@ protected:
/// Gather post-function debug information.
void endFunctionImpl(const MachineFunction *) override;
+ /// Check if the current module is in Fortran.
+ bool moduleIsInFortran() {
+ return CurrentSourceLanguage == codeview::SourceLanguage::Fortran;
+ }
+
public:
CodeViewDebug(AsmPrinter *AP);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 802f0e880514..5f4ee747fcca 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -93,19 +93,15 @@ void DIEHash::addParentContext(const DIE &Parent) {
// Reverse iterate over our list to go from the outermost construct to the
// innermost.
- for (SmallVectorImpl<const DIE *>::reverse_iterator I = Parents.rbegin(),
- E = Parents.rend();
- I != E; ++I) {
- const DIE &Die = **I;
-
+ for (const DIE *Die : llvm::reverse(Parents)) {
// ... Append the letter "C" to the sequence...
addULEB128('C');
// ... Followed by the DWARF tag of the construct...
- addULEB128(Die.getTag());
+ addULEB128(Die->getTag());
// ... Then the name, taken from the DW_AT_name attribute.
- StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name);
+ StringRef Name = getDIEStringAttr(*Die, dwarf::DW_AT_name);
LLVM_DEBUG(dbgs() << "... adding context: " << Name << "\n");
if (!Name.empty())
addString(Name);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index bb24f1414ef1..dd795079ac1a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -252,8 +252,8 @@ void DbgValueHistoryMap::trimLocationRanges(
// Now actually remove the entries. Iterate backwards so that our remaining
// ToRemove indices are valid after each erase.
- for (auto Itr = ToRemove.rbegin(), End = ToRemove.rend(); Itr != End; ++Itr)
- HistoryMapEntries.erase(HistoryMapEntries.begin() + *Itr);
+ for (EntryIndex Idx : llvm::reverse(ToRemove))
+ HistoryMapEntries.erase(HistoryMapEntries.begin() + Idx);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index c81288c0e460..4df34d2c9402 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -174,21 +174,26 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) {
}
bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) {
- // SROA may generate dbg value intrinsics to assign an unsigned value to a
- // Fortran CHARACTER(1) type variables. Make them as unsigned.
if (isa<DIStringType>(Ty)) {
- assert((Ty->getSizeInBits()) == 8 && "Not a valid unsigned type!");
+ // Some transformations (e.g. instcombine) may decide to turn a Fortran
+ // character object into an integer, and later ones (e.g. SROA) may
+ // further inject a constant integer in a llvm.dbg.value call to track
+ // the object's value. Here we trust the transformations are doing the
+ // right thing, and treat the constant as unsigned to preserve that value
+ // (i.e. avoid sign extension).
return true;
}
- if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
- // FIXME: Enums without a fixed underlying type have unknown signedness
- // here, leading to incorrectly emitted constants.
- if (CTy->getTag() == dwarf::DW_TAG_enumeration_type)
- return false;
- // (Pieces of) aggregate types that get hacked apart by SROA may be
- // represented by a constant. Encode them as unsigned bytes.
- return true;
+ if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
+ if (CTy->getTag() == dwarf::DW_TAG_enumeration_type) {
+ if (!(Ty = CTy->getBaseType()))
+ // FIXME: Enums without a fixed underlying type have unknown signedness
+ // here, leading to incorrectly emitted constants.
+ return false;
+ } else
+ // (Pieces of) aggregate types that get hacked apart by SROA may be
+ // represented by a constant. Encode them as unsigned bytes.
+ return true;
}
if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 62ebadaf3cbe..d7ab2091967f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -158,7 +158,7 @@ public:
friend bool operator<(const DbgValueLoc &, const DbgValueLoc &);
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void dump() const {
- for (DbgValueLocEntry DV : ValueLocEntries)
+ for (const DbgValueLocEntry &DV : ValueLocEntries)
DV.dump();
if (Expression)
Expression->dump();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index a3bf4be09fbe..a36d2966d44a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -143,8 +143,6 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
auto *GVContext = GV->getScope();
const DIType *GTy = GV->getType();
- // Construct the context before querying for the existence of the DIE in
- // case such construction creates the DIE.
auto *CB = GVContext ? dyn_cast<DICommonBlock>(GVContext) : nullptr;
DIE *ContextDIE = CB ? getOrCreateCommonBlock(CB, GlobalExprs)
: getOrCreateContextDIE(GVContext);
@@ -183,6 +181,8 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
else
addGlobalName(GV->getName(), *VariableDIE, DeclContext);
+ addAnnotation(*VariableDIE, GV->getAnnotations());
+
if (uint32_t AlignInBytes = GV->getAlignInBytes())
addUInt(*VariableDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
AlignInBytes);
@@ -260,14 +260,14 @@ void DwarfCompileUnit::addLocationAttribute(
if (Global) {
const MCSymbol *Sym = Asm->getSymbol(Global);
+ unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+ assert((PointerSize == 4 || PointerSize == 8) &&
+ "Add support for other sizes if necessary");
if (Global->isThreadLocal()) {
if (Asm->TM.useEmulatedTLS()) {
// TODO: add debug info for emulated thread local mode.
} else {
// FIXME: Make this work with -gsplit-dwarf.
- unsigned PointerSize = Asm->getDataLayout().getPointerSize();
- assert((PointerSize == 4 || PointerSize == 8) &&
- "Add support for other sizes if necessary");
// Based on GCC's support for TLS:
if (!DD->useSplitDwarf()) {
// 1) Start with a constNu of the appropriate pointer size
@@ -290,6 +290,24 @@ void DwarfCompileUnit::addLocationAttribute(
DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
: dwarf::DW_OP_form_tls_address);
}
+ } else if (Asm->TM.getRelocationModel() == Reloc::RWPI ||
+ Asm->TM.getRelocationModel() == Reloc::ROPI_RWPI) {
+ // Constant
+ addUInt(*Loc, dwarf::DW_FORM_data1,
+ PointerSize == 4 ? dwarf::DW_OP_const4u
+ : dwarf::DW_OP_const8u);
+ // Relocation offset
+ addExpr(*Loc, PointerSize == 4 ? dwarf::DW_FORM_data4
+ : dwarf::DW_FORM_data8,
+ Asm->getObjFileLowering().getIndirectSymViaRWPI(Sym));
+ // Base register
+ Register BaseReg = Asm->getObjFileLowering().getStaticBase();
+ BaseReg = Asm->TM.getMCRegisterInfo()->getDwarfRegNum(BaseReg, false);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + BaseReg);
+ // Offset from base register
+ addSInt(*Loc, dwarf::DW_FORM_sdata, 0);
+ // Operation
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
} else {
DD->addArangeLabel(SymbolCU(this, Sym));
addOpAddress(*Loc, Sym);
@@ -331,12 +349,10 @@ void DwarfCompileUnit::addLocationAttribute(
DIE *DwarfCompileUnit::getOrCreateCommonBlock(
const DICommonBlock *CB, ArrayRef<GlobalExpr> GlobalExprs) {
- // Construct the context before querying for the existence of the DIE in case
- // such construction creates the DIE.
- DIE *ContextDIE = getOrCreateContextDIE(CB->getScope());
-
+ // Check for pre-existence.
if (DIE *NDie = getDIE(CB))
return NDie;
+ DIE *ContextDIE = getOrCreateContextDIE(CB->getScope());
DIE &NDie = createAndAddDIE(dwarf::DW_TAG_common_block, *ContextDIE, CB);
StringRef Name = CB->getName().empty() ? "_BLNK_" : CB->getName();
addString(NDie, dwarf::DW_AT_name, Name);
@@ -351,7 +367,8 @@ DIE *DwarfCompileUnit::getOrCreateCommonBlock(
void DwarfCompileUnit::addRange(RangeSpan Range) {
DD->insertSectionLabel(Range.Begin);
- bool SameAsPrevCU = this == DD->getPrevCU();
+ auto *PrevCU = DD->getPrevCU();
+ bool SameAsPrevCU = this == PrevCU;
DD->setPrevCU(this);
// If we have no current ranges just add the range and return, otherwise,
// check the current section and CU against the previous section and CU we
@@ -360,6 +377,9 @@ void DwarfCompileUnit::addRange(RangeSpan Range) {
if (CURanges.empty() || !SameAsPrevCU ||
(&CURanges.back().End->getSection() !=
&Range.End->getSection())) {
+ // Before a new range is added, always terminate the prior line table.
+ if (PrevCU)
+ DD->terminateLineTable(PrevCU);
CURanges.push_back(Range);
return;
}
@@ -470,7 +490,6 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
addSInt(*Loc, dwarf::DW_FORM_sdata, TI_GLOBAL_RELOC);
if (!isDwoUnit()) {
addLabel(*Loc, dwarf::DW_FORM_data4, SPSym);
- DD->addArangeLabel(SymbolCU(this, SPSym));
} else {
// FIXME: when writing dwo, we need to avoid relocations. Probably
// the "right" solution is to treat globals the way func and data
@@ -970,9 +989,7 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) {
bool visitedAllDependencies = Item.getInt();
WorkList.pop_back();
- // Dependency is in a different lexical scope or a global.
- if (!Var)
- continue;
+ assert(Var);
// Already handled.
if (Visited.count(Var))
@@ -996,8 +1013,10 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) {
// visited again after all of its dependencies are handled.
WorkList.push_back({Var, 1});
for (auto *Dependency : dependencies(Var)) {
- auto Dep = dyn_cast_or_null<const DILocalVariable>(Dependency);
- WorkList.push_back({DbgVar[Dep], 0});
+ // Don't add dependency if it is in a different lexical scope or a global.
+ if (const auto *Dep = dyn_cast<const DILocalVariable>(Dependency))
+ if (DbgVariable *Var = DbgVar.lookup(Dep))
+ WorkList.push_back({Var, 0});
}
}
return Result;
@@ -1112,9 +1131,10 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
// shouldn't be found by lookup.
AbsDef = &ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, nullptr);
ContextCU->applySubprogramAttributesToDefinition(SP, *AbsDef);
-
- if (!ContextCU->includeMinimalInlineScopes())
- ContextCU->addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined);
+ ContextCU->addSInt(*AbsDef, dwarf::DW_AT_inline,
+ DD->getDwarfVersion() <= 4 ? Optional<dwarf::Form>()
+ : dwarf::DW_FORM_implicit_const,
+ dwarf::DW_INL_inlined);
if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, *AbsDef))
ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer);
}
@@ -1275,6 +1295,16 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE(
if (!Name.empty())
addString(*IMDie, dwarf::DW_AT_name, Name);
+ // This is for imported module with renamed entities (such as variables and
+ // subprograms).
+ DINodeArray Elements = Module->getElements();
+ for (const auto *Element : Elements) {
+ if (!Element)
+ continue;
+ IMDie->addChild(
+ constructImportedEntityDIE(cast<DIImportedEntity>(Element)));
+ }
+
return IMDie;
}
@@ -1489,10 +1519,12 @@ void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var,
if (!Name.empty())
addString(VariableDie, dwarf::DW_AT_name, Name);
const auto *DIVar = Var.getVariable();
- if (DIVar)
+ if (DIVar) {
if (uint32_t AlignInBytes = DIVar->getAlignInBytes())
addUInt(VariableDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
AlignInBytes);
+ addAnnotation(VariableDie, DIVar->getAnnotations());
+ }
addSourceLine(VariableDie, DIVar);
addType(VariableDie, Var.getType());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 52591a18791f..047676d4c11e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -480,7 +480,7 @@ static bool hasObjCCategory(StringRef Name) {
if (!isObjCClass(Name))
return false;
- return Name.find(") ") != StringRef::npos;
+ return Name.contains(") ");
}
static void getObjCClassCategory(StringRef In, StringRef &Class,
@@ -1101,11 +1101,6 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
}
- // Create DIEs for function declarations used for call site debug info.
- for (auto Scope : DIUnit->getRetainedTypes())
- if (auto *SP = dyn_cast_or_null<DISubprogram>(Scope))
- NewCU.getOrCreateSubprogramDIE(SP);
-
CUMap.insert({DIUnit, &NewCU});
CUDieMap.insert({&NewCU.getUnitDie(), &NewCU});
return NewCU;
@@ -1412,6 +1407,10 @@ void DwarfDebug::finalizeModuleInfo() {
// Emit all Dwarf sections that should come after the content.
void DwarfDebug::endModule() {
+ // Terminate the pending line table.
+ if (PrevCU)
+ terminateLineTable(PrevCU);
+ PrevCU = nullptr;
assert(CurFn == nullptr);
assert(CurMI == nullptr);
@@ -2087,12 +2086,22 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
// First known non-DBG_VALUE and non-frame setup location marks
// the beginning of the function body.
- for (const auto &MBB : *MF)
- for (const auto &MI : MBB)
+ DebugLoc LineZeroLoc;
+ for (const auto &MBB : *MF) {
+ for (const auto &MI : MBB) {
if (!MI.isMetaInstruction() && !MI.getFlag(MachineInstr::FrameSetup) &&
- MI.getDebugLoc())
- return MI.getDebugLoc();
- return DebugLoc();
+ MI.getDebugLoc()) {
+ // Scan forward to try to find a non-zero line number. The prologue_end
+ // marks the first breakpoint in the function after the frame setup, and
+ // a compiler-generated line 0 location is not a meaningful breakpoint.
+ // If none is found, return the first location after the frame setup.
+ if (MI.getDebugLoc().getLine())
+ return MI.getDebugLoc();
+ LineZeroLoc = MI.getDebugLoc();
+ }
+ }
+ }
+ return LineZeroLoc;
}
/// Register a source line with debug info. Returns the unique label that was
@@ -2147,24 +2156,42 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit());
+ Asm->OutStreamer->getContext().setDwarfCompileUnitID(
+ getDwarfCompileUnitIDForLineTable(CU));
+
+ // Record beginning of function.
+ PrologEndLoc = emitInitialLocDirective(
+ *MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID());
+}
+
+unsigned
+DwarfDebug::getDwarfCompileUnitIDForLineTable(const DwarfCompileUnit &CU) {
// Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function
// belongs to so that we add to the correct per-cu line table in the
// non-asm case.
if (Asm->OutStreamer->hasRawTextSupport())
// Use a single line table if we are generating assembly.
- Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
+ return 0;
else
- Asm->OutStreamer->getContext().setDwarfCompileUnitID(CU.getUniqueID());
+ return CU.getUniqueID();
+}
- // Record beginning of function.
- PrologEndLoc = emitInitialLocDirective(
- *MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID());
+void DwarfDebug::terminateLineTable(const DwarfCompileUnit *CU) {
+ const auto &CURanges = CU->getRanges();
+ auto &LineTable = Asm->OutStreamer->getContext().getMCDwarfLineTable(
+ getDwarfCompileUnitIDForLineTable(*CU));
+ // Add the last range label for the given CU.
+ LineTable.getMCLineSections().addEndEntry(
+ const_cast<MCSymbol *>(CURanges.back().End));
}
void DwarfDebug::skippedNonDebugFunction() {
// If we don't have a subprogram for this function then there will be a hole
// in the range information. Keep note of this by setting the previously used
// section to nullptr.
+ // Terminate the pending line table.
+ if (PrevCU)
+ terminateLineTable(PrevCU);
PrevCU = nullptr;
CurFn = nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index b55be799b6bc..4e1a1b1e068d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -65,19 +65,21 @@ class Module;
/// such that it could levarage polymorphism to extract common code for
/// DbgVariable and DbgLabel.
class DbgEntity {
- const DINode *Entity;
- const DILocation *InlinedAt;
- DIE *TheDIE = nullptr;
- unsigned SubclassID;
-
public:
enum DbgEntityKind {
DbgVariableKind,
DbgLabelKind
};
- DbgEntity(const DINode *N, const DILocation *IA, unsigned ID)
- : Entity(N), InlinedAt(IA), SubclassID(ID) {}
+private:
+ const DINode *Entity;
+ const DILocation *InlinedAt;
+ DIE *TheDIE = nullptr;
+ const DbgEntityKind SubclassID;
+
+public:
+ DbgEntity(const DINode *N, const DILocation *IA, DbgEntityKind ID)
+ : Entity(N), InlinedAt(IA), SubclassID(ID) {}
virtual ~DbgEntity() {}
/// Accessors.
@@ -85,19 +87,18 @@ public:
const DINode *getEntity() const { return Entity; }
const DILocation *getInlinedAt() const { return InlinedAt; }
DIE *getDIE() const { return TheDIE; }
- unsigned getDbgEntityID() const { return SubclassID; }
+ DbgEntityKind getDbgEntityID() const { return SubclassID; }
/// @}
void setDIE(DIE &D) { TheDIE = &D; }
static bool classof(const DbgEntity *N) {
switch (N->getDbgEntityID()) {
- default:
- return false;
case DbgVariableKind:
case DbgLabelKind:
return true;
}
+ llvm_unreachable("Invalid DbgEntityKind");
}
};
@@ -612,7 +613,7 @@ private:
DenseSet<InlinedEntity> &ProcessedVars);
/// Build the location list for all DBG_VALUEs in the
- /// function that describe the same variable. If the resulting
+ /// function that describe the same variable. If the resulting
/// list has only one entry that is valid for entire variable's
/// scope return true.
bool buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
@@ -632,6 +633,9 @@ protected:
/// Gather and emit post-function debug information.
void endFunctionImpl(const MachineFunction *MF) override;
+ /// Get Dwarf compile unit ID for line table.
+ unsigned getDwarfCompileUnitIDForLineTable(const DwarfCompileUnit &CU);
+
void skippedNonDebugFunction() override;
public:
@@ -778,6 +782,9 @@ public:
const DwarfCompileUnit *getPrevCU() const { return PrevCU; }
void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; }
+ /// Terminate the line table by adding the last range label.
+ void terminateLineTable(const DwarfCompileUnit *CU);
+
/// Returns the entries for the .debug_loc section.
const DebugLocStream &getDebugLocs() const { return DebugLocs; }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 9d7b3d6e1891..976e35905144 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -672,7 +672,7 @@ std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
// Reverse iterate over our list to go from the outermost construct to the
// innermost.
- for (const DIScope *Ctx : make_range(Parents.rbegin(), Parents.rend())) {
+ for (const DIScope *Ctx : llvm::reverse(Parents)) {
StringRef Name = Ctx->getName();
if (Name.empty() && isa<DINamespace>(Ctx))
Name = "(anonymous namespace)";
@@ -754,6 +754,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
if (!Name.empty())
addString(Buffer, dwarf::DW_AT_name, Name);
+ addAnnotation(Buffer, DTy->getAnnotations());
+
// If alignment is specified for a typedef , create and insert DW_AT_alignment
// attribute in DW_TAG_typedef DIE.
if (Tag == dwarf::DW_TAG_typedef && DD->getDwarfVersion() >= 5) {
@@ -833,6 +835,23 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) {
addFlag(Buffer, dwarf::DW_AT_rvalue_reference);
}
+void DwarfUnit::addAnnotation(DIE &Buffer, DINodeArray Annotations) {
+ if (!Annotations)
+ return;
+
+ for (const Metadata *Annotation : Annotations->operands()) {
+ const MDNode *MD = cast<MDNode>(Annotation);
+ const MDString *Name = cast<MDString>(MD->getOperand(0));
+
+ // Currently, only MDString is supported with btf_decl_tag attribute.
+ const MDString *Value = cast<MDString>(MD->getOperand(1));
+
+ DIE &AnnotationDie = createAndAddDIE(dwarf::DW_TAG_LLVM_annotation, Buffer);
+ addString(AnnotationDie, dwarf::DW_AT_name, Name->getString());
+ addString(AnnotationDie, dwarf::DW_AT_const_value, Value->getString());
+ }
+}
+
void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
// Add name if not anonymous or intermediate type.
StringRef Name = CTy->getName();
@@ -850,7 +869,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
case dwarf::DW_TAG_variant_part:
case dwarf::DW_TAG_structure_type:
case dwarf::DW_TAG_union_type:
- case dwarf::DW_TAG_class_type: {
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_namelist: {
// Emit the discriminator for a variant part.
DIDerivedType *Discriminator = nullptr;
if (Tag == dwarf::DW_TAG_variant_part) {
@@ -919,6 +939,13 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
DIE &VariantPart = createAndAddDIE(Composite->getTag(), Buffer);
constructTypeDIE(VariantPart, Composite);
}
+ } else if (Tag == dwarf::DW_TAG_namelist) {
+ auto *Var = dyn_cast<DINode>(Element);
+ auto *VarDIE = getDIE(Var);
+ if (VarDIE) {
+ DIE &ItemDie = createAndAddDIE(dwarf::DW_TAG_namelist_item, Buffer);
+ addDIEEntry(ItemDie, dwarf::DW_AT_namelist_item, *VarDIE);
+ }
}
}
@@ -961,6 +988,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
if (!Name.empty())
addString(Buffer, dwarf::DW_AT_name, Name);
+ addAnnotation(Buffer, CTy->getAnnotations());
+
if (Tag == dwarf::DW_TAG_enumeration_type ||
Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type ||
Tag == dwarf::DW_TAG_union_type) {
@@ -1197,6 +1226,8 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
if (!SP->getName().empty())
addString(SPDie, dwarf::DW_AT_name, SP->getName());
+ addAnnotation(SPDie, SP->getAnnotations());
+
if (!SkipSPSourceLocation)
addSourceLine(SPDie, SP);
@@ -1547,6 +1578,8 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
if (!Name.empty())
addString(MemberDie, dwarf::DW_AT_name, Name);
+ addAnnotation(MemberDie, DT->getAnnotations());
+
if (DIType *Resolved = DT->getBaseType())
addType(MemberDie, Resolved);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 4d31dd0daf59..8140279adaef 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -294,6 +294,9 @@ public:
void addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label, const MCSymbol *Sec);
+ /// Add DW_TAG_LLVM_annotation.
+ void addAnnotation(DIE &Buffer, DINodeArray Annotations);
+
/// Get context owner's DIE.
DIE *createTypeDIE(const DICompositeType *Ty);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index e589c2e64abd..150f19324834 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -812,8 +812,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) {
Entry = TypeInfos.size();
}
- for (const GlobalValue *GV : make_range(TypeInfos.rbegin(),
- TypeInfos.rend())) {
+ for (const GlobalValue *GV : llvm::reverse(TypeInfos)) {
if (VerboseAsm)
Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--));
Asm->emitTTypeReference(GV, TTypeEncoding);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
index 35a830f416f6..9e6f1a537de3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
@@ -20,6 +20,8 @@
using namespace llvm;
+PseudoProbeHandler::~PseudoProbeHandler() = default;
+
void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,
uint64_t Type, uint64_t Attr,
const DILocation *DebugLoc) {
@@ -35,7 +37,10 @@ void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,
auto Name = SP->getLinkageName();
if (Name.empty())
Name = SP->getName();
- uint64_t CallerGuid = Function::getGUID(Name);
+ // Use caching to avoid redundant md5 computation for build speed.
+ uint64_t &CallerGuid = NameGuidMap[Name];
+ if (!CallerGuid)
+ CallerGuid = Function::getGUID(Name);
uint64_t CallerProbeId = PseudoProbeDwarfDiscriminator::extractProbeIndex(
InlinedAt->getDiscriminator());
ReversedInlineStack.emplace_back(CallerGuid, CallerProbeId);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
index f2026a118bf5..7d5e51218693 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
@@ -26,9 +26,12 @@ class DILocation;
class PseudoProbeHandler : public AsmPrinterHandler {
// Target of pseudo probe emission.
AsmPrinter *Asm;
+ // Name to GUID map, used as caching/memoization for speed.
+ DenseMap<StringRef, uint64_t> NameGuidMap;
public:
PseudoProbeHandler(AsmPrinter *A) : Asm(A){};
+ ~PseudoProbeHandler() override;
void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type,
uint64_t Attr, const DILocation *DebugLoc);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
index 352a33e8639d..a17a2ca2790e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
@@ -18,16 +18,25 @@
using namespace llvm;
void WasmException::endModule() {
- // This is the symbol used in 'throw' and 'catch' instruction to denote this
- // is a C++ exception. This symbol has to be emitted somewhere once in the
- // module. Check if the symbol has already been created, i.e., we have at
- // least one 'throw' or 'catch' instruction in the module, and emit the symbol
- // only if so.
- SmallString<60> NameStr;
- Mangler::getNameWithPrefix(NameStr, "__cpp_exception", Asm->getDataLayout());
- if (Asm->OutContext.lookupSymbol(NameStr)) {
- MCSymbol *ExceptionSym = Asm->GetExternalSymbolSymbol("__cpp_exception");
- Asm->OutStreamer->emitLabel(ExceptionSym);
+ // These are symbols used to throw/catch C++ exceptions and C longjmps. These
+ // symbols have to be emitted somewhere once in the module. Check if each of
+ // the symbols has already been created, i.e., we have at least one 'throw' or
+ // 'catch' instruction with the symbol in the module, and emit the symbol only
+ // if so.
+ //
+ // But in dynamic linking, it is in general not possible to come up with a
+ // module instantiating order in which tag-defining modules are loaded before
+ // the importing modules. So we make them undefined symbols here, define tags
+ // in the JS side, and feed them to each importing module.
+ if (!Asm->isPositionIndependent()) {
+ for (const char *SymName : {"__cpp_exception", "__c_longjmp"}) {
+ SmallString<60> NameStr;
+ Mangler::getNameWithPrefix(NameStr, SymName, Asm->getDataLayout());
+ if (Asm->OutContext.lookupSymbol(NameStr)) {
+ MCSymbol *ExceptionSym = Asm->GetExternalSymbolSymbol(SymName);
+ Asm->OutStreamer->emitLabel(ExceptionSym);
+ }
+ }
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index b30d9cc12abc..ef57031c7294 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -43,6 +43,7 @@ WinException::WinException(AsmPrinter *A) : EHStreamer(A) {
// platforms use an imagerel32 relocation to refer to symbols.
useImageRel32 = (A->getDataLayout().getPointerSizeInBits() == 64);
isAArch64 = Asm->TM.getTargetTriple().isAArch64();
+ isThumb = Asm->TM.getTargetTriple().isThumb();
}
WinException::~WinException() {}
@@ -330,10 +331,12 @@ const MCExpr *WinException::create32bitRef(const GlobalValue *GV) {
}
const MCExpr *WinException::getLabel(const MCSymbol *Label) {
- if (isAArch64)
- return MCSymbolRefExpr::create(Label, MCSymbolRefExpr::VK_COFF_IMGREL32,
- Asm->OutContext);
- return MCBinaryExpr::createAdd(create32bitRef(Label),
+ return MCSymbolRefExpr::create(Label, MCSymbolRefExpr::VK_COFF_IMGREL32,
+ Asm->OutContext);
+}
+
+const MCExpr *WinException::getLabelPlusOne(const MCSymbol *Label) {
+ return MCBinaryExpr::createAdd(getLabel(Label),
MCConstantExpr::create(1, Asm->OutContext),
Asm->OutContext);
}
@@ -561,8 +564,8 @@ InvokeStateChangeIterator &InvokeStateChangeIterator::scan() {
/// struct Table {
/// int NumEntries;
/// struct Entry {
-/// imagerel32 LabelStart;
-/// imagerel32 LabelEnd;
+/// imagerel32 LabelStart; // Inclusive
+/// imagerel32 LabelEnd; // Exclusive
/// imagerel32 FilterOrFinally; // One means catch-all.
/// imagerel32 LabelLPad; // Zero means __finally.
/// } Entries[NumEntries];
@@ -664,7 +667,7 @@ void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
AddComment("LabelStart");
OS.emitValue(getLabel(BeginLabel), 4);
AddComment("LabelEnd");
- OS.emitValue(getLabel(EndLabel), 4);
+ OS.emitValue(getLabelPlusOne(EndLabel), 4);
AddComment(UME.IsFinally ? "FinallyFunclet" : UME.Filter ? "FilterFunction"
: "CatchAll");
OS.emitValue(FilterOrFinally, 4);
@@ -949,8 +952,15 @@ void WinException::computeIP2StateTable(
if (!ChangeLabel)
ChangeLabel = StateChange.PreviousEndLabel;
// Emit an entry indicating that PCs after 'Label' have this EH state.
+ // NOTE: On ARM architectures, the StateFromIp automatically takes into
+ // account that the return address is after the call instruction (whose EH
+ // state we should be using), but on other platforms we need to +1 to the
+ // label so that we are using the correct EH state.
+ const MCExpr *LabelExpression = (isAArch64 || isThumb)
+ ? getLabel(ChangeLabel)
+ : getLabelPlusOne(ChangeLabel);
IPToStateTable.push_back(
- std::make_pair(getLabel(ChangeLabel), StateChange.NewState));
+ std::make_pair(LabelExpression, StateChange.NewState));
// FIXME: assert that NewState is between CatchLow and CatchHigh.
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.h
index feea05ba63ad..638589adf0dd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.h
@@ -39,6 +39,9 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
/// True if we are generating exception handling on Windows for ARM64.
bool isAArch64 = false;
+ /// True if we are generating exception handling on Windows for ARM (Thumb).
+ bool isThumb = false;
+
/// Pointer to the current funclet entry BB.
const MachineBasicBlock *CurrentFuncletEntry = nullptr;
@@ -77,6 +80,7 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
const MCExpr *create32bitRef(const MCSymbol *Value);
const MCExpr *create32bitRef(const GlobalValue *GV);
const MCExpr *getLabel(const MCSymbol *Label);
+ const MCExpr *getLabelPlusOne(const MCSymbol *Label);
const MCExpr *getOffset(const MCSymbol *OffsetOf, const MCSymbol *OffsetFrom);
const MCExpr *getOffsetPlusOne(const MCSymbol *OffsetOf,
const MCSymbol *OffsetFrom);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 125a3be585cb..4838f6da750d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/AtomicExpandUtils.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -179,11 +180,9 @@ bool AtomicExpand::runOnFunction(Function &F) {
// Changing control-flow while iterating through it is a bad idea, so gather a
// list of all atomic instructions before we start.
- for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
- Instruction *I = &*II;
- if (I->isAtomic() && !isa<FenceInst>(I))
- AtomicInsts.push_back(I);
- }
+ for (Instruction &I : instructions(F))
+ if (I.isAtomic() && !isa<FenceInst>(&I))
+ AtomicInsts.push_back(&I);
bool MadeChange = false;
for (auto I : AtomicInsts) {
@@ -570,7 +569,9 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
}
bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
- switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
+ LLVMContext &Ctx = AI->getModule()->getContext();
+ TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
+ switch (Kind) {
case TargetLoweringBase::AtomicExpansionKind::None:
return false;
case TargetLoweringBase::AtomicExpansionKind::LLSC: {
@@ -600,6 +601,18 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
expandPartwordAtomicRMW(AI,
TargetLoweringBase::AtomicExpansionKind::CmpXChg);
} else {
+ SmallVector<StringRef> SSNs;
+ Ctx.getSyncScopeNames(SSNs);
+ auto MemScope = SSNs[AI->getSyncScopeID()].empty()
+ ? "system"
+ : SSNs[AI->getSyncScopeID()];
+ OptimizationRemarkEmitter ORE(AI->getFunction());
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
+ << "A compare and swap loop was generated for an atomic "
+ << AI->getOperationName(AI->getOperation()) << " operation at "
+ << MemScope << " memory scope";
+ });
expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
}
return true;
@@ -1850,7 +1863,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
// Now, the return type.
if (CASExpected) {
ResultTy = Type::getInt1Ty(Ctx);
- Attr = Attr.addAttribute(Ctx, AttributeList::ReturnIndex, Attribute::ZExt);
+ Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
} else if (HasResult && UseSizedLibcall)
ResultTy = SizedIntTy;
else
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp
index 1a6eed272ca2..c1901bc46d72 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -21,9 +21,21 @@
// clusters of basic blocks. Every cluster will be emitted into a separate
// section with its basic blocks sequenced in the given order. To get the
// optimized performance, the clusters must form an optimal BB layout for the
-// function. Every cluster's section is labeled with a symbol to allow the
-// linker to reorder the sections in any arbitrary sequence. A global order of
-// these sections would encapsulate the function layout.
+// function. We insert a symbol at the beginning of every cluster's section to
+// allow the linker to reorder the sections in any arbitrary sequence. A global
+// order of these sections would encapsulate the function layout.
+// For example, consider the following clusters for a function foo (consisting
+// of 6 basic blocks 0, 1, ..., 5).
+//
+// 0 2
+// 1 3 5
+//
+// * Basic blocks 0 and 2 are placed in one section with symbol `foo`
+// referencing the beginning of this section.
+// * Basic blocks 1, 3, 5 are placed in a separate section. A new symbol
+// `foo.__part.1` will reference the beginning of this section.
+// * Basic block 4 (note that it is not referenced in the list) is placed in
+// one section, and a new symbol `foo.cold` will point to it.
//
// There are a couple of challenges to be addressed:
//
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
index 65e7e92fe152..5ac8f49a9522 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
@@ -611,7 +611,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// there are fallthroughs, and we don't know until after layout.
if (AfterPlacement && FullBlockTail1 && FullBlockTail2) {
auto BothFallThrough = [](MachineBasicBlock *MBB) {
- if (MBB->succ_size() != 0 && !MBB->canFallThrough())
+ if (!MBB->succ_empty() && !MBB->canFallThrough())
return false;
MachineFunction::iterator I(MBB);
MachineFunction *MF = MBB->getParent();
@@ -1198,14 +1198,13 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
// Renumbering blocks alters EH scope membership, recalculate it.
EHScopeMembership = getEHScopeMembership(MF);
- for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
- I != E; ) {
- MachineBasicBlock *MBB = &*I++;
- MadeChange |= OptimizeBlock(MBB);
+ for (MachineBasicBlock &MBB :
+ llvm::make_early_inc_range(llvm::drop_begin(MF))) {
+ MadeChange |= OptimizeBlock(&MBB);
// If it is dead, remove it.
- if (MBB->pred_empty()) {
- RemoveDeadBlock(MBB);
+ if (MBB.pred_empty()) {
+ RemoveDeadBlock(&MBB);
MadeChange = true;
++NumDeadBlocks;
}
@@ -1753,10 +1752,8 @@ ReoptimizeBlock:
bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
bool MadeChange = false;
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) {
- MachineBasicBlock *MBB = &*I++;
- MadeChange |= HoistCommonCodeInSuccs(MBB);
- }
+ for (MachineBasicBlock &MBB : llvm::make_early_inc_range(MF))
+ MadeChange |= HoistCommonCodeInSuccs(&MBB);
return MadeChange;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
index 366c303614d6..50825ccf9bac 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -463,10 +463,48 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
DebugLoc DL = MI.getDebugLoc();
MI.eraseFromParent();
- BlockInfo[BranchBB->getNumber()].Size += TII->insertIndirectBranch(
- *BranchBB, *DestBB, DL, DestOffset - SrcOffset, RS.get());
+ // Create the optional restore block and, initially, place it at the end of
+ // function. That block will be placed later if it's used; otherwise, it will
+ // be erased.
+ MachineBasicBlock *RestoreBB = createNewBlockAfter(MF->back());
+
+ TII->insertIndirectBranch(*BranchBB, *DestBB, *RestoreBB, DL,
+ DestOffset - SrcOffset, RS.get());
+
+ BlockInfo[BranchBB->getNumber()].Size = computeBlockSize(*BranchBB);
adjustBlockOffsets(*MBB);
+
+ // If RestoreBB is required, try to place just before DestBB.
+ if (!RestoreBB->empty()) {
+ // TODO: For multiple far branches to the same destination, there are
+ // chances that some restore blocks could be shared if they clobber the
+ // same registers and share the same restore sequence. So far, those
+ // restore blocks are just duplicated for each far branch.
+ assert(!DestBB->isEntryBlock());
+ MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator());
+ if (auto *FT = PrevBB->getFallThrough()) {
+ assert(FT == DestBB);
+ TII->insertUnconditionalBranch(*PrevBB, FT, DebugLoc());
+ // Recalculate the block size.
+ BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB);
+ }
+ // Now, RestoreBB could be placed directly before DestBB.
+ MF->splice(DestBB->getIterator(), RestoreBB->getIterator());
+ // Update successors and predecessors.
+ RestoreBB->addSuccessor(DestBB);
+ BranchBB->replaceSuccessor(DestBB, RestoreBB);
+ if (TRI->trackLivenessAfterRegAlloc(*MF))
+ computeAndAddLiveIns(LiveRegs, *RestoreBB);
+ // Compute the restore block size.
+ BlockInfo[RestoreBB->getNumber()].Size = computeBlockSize(*RestoreBB);
+ // Update the offset starting from the previous block.
+ adjustBlockOffsets(*PrevBB);
+ } else {
+ // Remove restore block if it's not required.
+ MF->erase(RestoreBB);
+ }
+
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
index b11db3e65770..558700bd9b3b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
@@ -244,7 +244,7 @@ void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) {
MachineInstr *UndefMI = UndefReads.back().first;
unsigned OpIdx = UndefReads.back().second;
- for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) {
+ for (MachineInstr &I : llvm::reverse(*MBB)) {
// Update liveness, including the current instruction's defs.
LiveRegSet.stepBackward(I);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp
new file mode 100644
index 000000000000..877aa69c3e58
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp
@@ -0,0 +1,169 @@
+//===-- CodeGenCommonISel.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines common utilies that are shared between SelectionDAG and
+// GlobalISel frameworks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CodeGenCommonISel.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+
+using namespace llvm;
+
+/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB
+/// is 0.
+MachineBasicBlock *
+StackProtectorDescriptor::addSuccessorMBB(
+ const BasicBlock *BB, MachineBasicBlock *ParentMBB, bool IsLikely,
+ MachineBasicBlock *SuccMBB) {
+ // If SuccBB has not been created yet, create it.
+ if (!SuccMBB) {
+ MachineFunction *MF = ParentMBB->getParent();
+ MachineFunction::iterator BBI(ParentMBB);
+ SuccMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(++BBI, SuccMBB);
+ }
+ // Add it as a successor of ParentMBB.
+ ParentMBB->addSuccessor(
+ SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely));
+ return SuccMBB;
+}
+
+/// Given that the input MI is before a partial terminator sequence TSeq, return
+/// true if M + TSeq also a partial terminator sequence.
+///
+/// A Terminator sequence is a sequence of MachineInstrs which at this point in
+/// lowering copy vregs into physical registers, which are then passed into
+/// terminator instructors so we can satisfy ABI constraints. A partial
+/// terminator sequence is an improper subset of a terminator sequence (i.e. it
+/// may be the whole terminator sequence).
+static bool MIIsInTerminatorSequence(const MachineInstr &MI) {
+ // If we do not have a copy or an implicit def, we return true if and only if
+ // MI is a debug value.
+ if (!MI.isCopy() && !MI.isImplicitDef()) {
+ // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the
+ // physical registers if there is debug info associated with the terminator
+ // of our mbb. We want to include said debug info in our terminator
+ // sequence, so we return true in that case.
+ if (MI.isDebugInstr())
+ return true;
+
+ // For GlobalISel, we may have extension instructions for arguments within
+ // copy sequences. Allow these.
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_MERGE_VALUES:
+ case TargetOpcode::G_UNMERGE_VALUES:
+ case TargetOpcode::G_CONCAT_VECTORS:
+ case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_EXTRACT:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ // We have left the terminator sequence if we are not doing one of the
+ // following:
+ //
+ // 1. Copying a vreg into a physical register.
+ // 2. Copying a vreg into a vreg.
+ // 3. Defining a register via an implicit def.
+
+ // OPI should always be a register definition...
+ MachineInstr::const_mop_iterator OPI = MI.operands_begin();
+ if (!OPI->isReg() || !OPI->isDef())
+ return false;
+
+ // Defining any register via an implicit def is always ok.
+ if (MI.isImplicitDef())
+ return true;
+
+ // Grab the copy source...
+ MachineInstr::const_mop_iterator OPI2 = OPI;
+ ++OPI2;
+ assert(OPI2 != MI.operands_end()
+ && "Should have a copy implying we should have 2 arguments.");
+
+ // Make sure that the copy dest is not a vreg when the copy source is a
+ // physical register.
+ if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) &&
+ Register::isPhysicalRegister(OPI2->getReg())))
+ return false;
+
+ return true;
+}
+
+/// Find the split point at which to splice the end of BB into its success stack
+/// protector check machine basic block.
+///
+/// On many platforms, due to ABI constraints, terminators, even before register
+/// allocation, use physical registers. This creates an issue for us since
+/// physical registers at this point can not travel across basic
+/// blocks. Luckily, selectiondag always moves physical registers into vregs
+/// when they enter functions and moves them through a sequence of copies back
+/// into the physical registers right before the terminator creating a
+/// ``Terminator Sequence''. This function is searching for the beginning of the
+/// terminator sequence so that we can ensure that we splice off not just the
+/// terminator, but additionally the copies that move the vregs into the
+/// physical registers.
+MachineBasicBlock::iterator
+llvm::findSplitPointForStackProtector(MachineBasicBlock *BB,
+ const TargetInstrInfo &TII) {
+ MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator();
+ if (SplitPoint == BB->begin())
+ return SplitPoint;
+
+ MachineBasicBlock::iterator Start = BB->begin();
+ MachineBasicBlock::iterator Previous = SplitPoint;
+ --Previous;
+
+ if (TII.isTailCall(*SplitPoint) &&
+ Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) {
+ // Call frames cannot be nested, so if this frame is describing the tail
+ // call itself, then we must insert before the sequence even starts. For
+ // example:
+ // <split point>
+ // ADJCALLSTACKDOWN ...
+ // <Moves>
+ // ADJCALLSTACKUP ...
+ // TAILJMP somewhere
+ // On the other hand, it could be an unrelated call in which case this tail
+ // call has to register moves of its own and should be the split point. For
+ // example:
+ // ADJCALLSTACKDOWN
+ // CALL something_else
+ // ADJCALLSTACKUP
+ // <split point>
+ // TAILJMP somewhere
+ do {
+ --Previous;
+ if (Previous->isCall())
+ return SplitPoint;
+ } while(Previous->getOpcode() != TII.getCallFrameSetupOpcode());
+
+ return Previous;
+ }
+
+ while (MIIsInTerminatorSequence(*Previous)) {
+ SplitPoint = Previous;
+ if (Previous == Start)
+ break;
+ --Previous;
+ }
+
+ return SplitPoint;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 77ce3d2fb563..ac4180c4c3ab 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -530,10 +530,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
while (MadeChange) {
MadeChange = false;
DT.reset();
- for (Function::iterator I = F.begin(); I != F.end(); ) {
- BasicBlock *BB = &*I++;
+ for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
bool ModifiedDTOnIteration = false;
- MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
+ MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration);
// Restart BB iteration if the dominator tree of the Function was changed
if (ModifiedDTOnIteration)
@@ -660,12 +659,8 @@ void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
return;
auto &GEPVector = VecI->second;
- const auto &I =
- llvm::find_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
- if (I == GEPVector.end())
- return;
+ llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
- GEPVector.erase(I);
if (GEPVector.empty())
LargeOffsetGEPMap.erase(VecI);
}
@@ -2037,7 +2032,7 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
// Only handle legal scalar cases. Anything else requires too much work.
Type *Ty = CountZeros->getType();
- unsigned SizeInBits = Ty->getPrimitiveSizeInBits();
+ unsigned SizeInBits = Ty->getScalarSizeInBits();
if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
return false;
@@ -2108,7 +2103,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// idea
unsigned MinSize, PrefAlign;
if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
- for (auto &Arg : CI->arg_operands()) {
+ for (auto &Arg : CI->args()) {
// We want to align both objects whose address is used directly and
// objects whose address is used in casts and GEPs, though it only makes
// sense for GEPs if the offset is a multiple of the desired alignment and
@@ -2159,7 +2154,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// into their uses. TODO: generalize this to work over profiling data
if (CI->hasFnAttr(Attribute::Cold) &&
!OptSize && !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
- for (auto &Arg : CI->arg_operands()) {
+ for (auto &Arg : CI->args()) {
if (!Arg->getType()->isPointerTy())
continue;
unsigned AS = Arg->getType()->getPointerAddressSpace();
@@ -3718,7 +3713,8 @@ private:
// Traverse all Phis until we found equivalent or fail to do that.
bool IsMatched = false;
for (auto &P : PHI->getParent()->phis()) {
- if (&P == PHI)
+ // Skip new Phi nodes.
+ if (PhiNodesToMatch.count(&P))
continue;
if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
break;
@@ -4187,7 +4183,7 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
if (Inst->getOpcode() == Instruction::Xor) {
const ConstantInt *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1));
// Make sure it is not a NOT.
- if (Cst && !Cst->getValue().isAllOnesValue())
+ if (Cst && !Cst->getValue().isAllOnes())
return true;
}
@@ -4858,10 +4854,9 @@ static constexpr int MaxMemoryUsesToScan = 20;
/// Recursively walk all the uses of I until we find a memory use.
/// If we find an obviously non-foldable instruction, return true.
-/// Add the ultimately found memory instructions to MemoryUses.
+/// Add accessed addresses and types to MemoryUses.
static bool FindAllMemoryUses(
- Instruction *I,
- SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
+ Instruction *I, SmallVectorImpl<std::pair<Value *, Type *>> &MemoryUses,
SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI, int SeenInsts = 0) {
@@ -4882,31 +4877,28 @@ static bool FindAllMemoryUses(
Instruction *UserI = cast<Instruction>(U.getUser());
if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
- MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
+ MemoryUses.push_back({U.get(), LI->getType()});
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
- unsigned opNo = U.getOperandNo();
- if (opNo != StoreInst::getPointerOperandIndex())
+ if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
return true; // Storing addr, not into addr.
- MemoryUses.push_back(std::make_pair(SI, opNo));
+ MemoryUses.push_back({U.get(), SI->getValueOperand()->getType()});
continue;
}
if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
- unsigned opNo = U.getOperandNo();
- if (opNo != AtomicRMWInst::getPointerOperandIndex())
+ if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
return true; // Storing addr, not into addr.
- MemoryUses.push_back(std::make_pair(RMW, opNo));
+ MemoryUses.push_back({U.get(), RMW->getValOperand()->getType()});
continue;
}
if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
- unsigned opNo = U.getOperandNo();
- if (opNo != AtomicCmpXchgInst::getPointerOperandIndex())
+ if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
return true; // Storing addr, not into addr.
- MemoryUses.push_back(std::make_pair(CmpX, opNo));
+ MemoryUses.push_back({U.get(), CmpX->getCompareOperand()->getType()});
continue;
}
@@ -5016,7 +5008,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// we can remove the addressing mode and effectively trade one live register
// for another (at worst.) In this context, folding an addressing mode into
// the use is just a particularly nice way of sinking it.
- SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
+ SmallVector<std::pair<Value *, Type *>, 16> MemoryUses;
SmallPtrSet<Instruction*, 16> ConsideredInsts;
if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
PSI, BFI))
@@ -5032,18 +5024,10 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// growth since most architectures have some reasonable small and fast way to
// compute an effective address. (i.e LEA on x86)
SmallVector<Instruction*, 32> MatchedAddrModeInsts;
- for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
- Instruction *User = MemoryUses[i].first;
- unsigned OpNo = MemoryUses[i].second;
-
- // Get the access type of this use. If the use isn't a pointer, we don't
- // know what it accesses.
- Value *Address = User->getOperand(OpNo);
- PointerType *AddrTy = dyn_cast<PointerType>(Address->getType());
- if (!AddrTy)
- return false;
- Type *AddressAccessTy = AddrTy->getElementType();
- unsigned AS = AddrTy->getAddressSpace();
+ for (const std::pair<Value *, Type *> &Pair : MemoryUses) {
+ Value *Address = Pair.first;
+ Type *AddressAccessTy = Pair.second;
+ unsigned AS = Address->getType()->getPointerAddressSpace();
// Do a match against the root of this address, ignoring profitability. This
// will tell us if the addressing mode for the memory operation will
@@ -5124,8 +5108,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
while (!worklist.empty()) {
- Value *V = worklist.back();
- worklist.pop_back();
+ Value *V = worklist.pop_back_val();
// We allow traversing cyclic Phi nodes.
// In case of success after this loop we ensure that traversing through
@@ -6477,8 +6460,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
APInt WidestAndBits(BitWidth, 0);
while (!WorkList.empty()) {
- Instruction *I = WorkList.back();
- WorkList.pop_back();
+ Instruction *I = WorkList.pop_back_val();
// Break use-def graph loops.
if (!Visited.insert(I).second)
@@ -6950,16 +6932,26 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
BasicBlock *TargetBB = I->getParent();
bool Changed = false;
SmallVector<Use *, 4> ToReplace;
+ Instruction *InsertPoint = I;
+ DenseMap<const Instruction *, unsigned long> InstOrdering;
+ unsigned long InstNumber = 0;
+ for (const auto &I : *TargetBB)
+ InstOrdering[&I] = InstNumber++;
+
for (Use *U : reverse(OpsToSink)) {
auto *UI = cast<Instruction>(U->get());
- if (UI->getParent() == TargetBB || isa<PHINode>(UI))
+ if (isa<PHINode>(UI))
continue;
+ if (UI->getParent() == TargetBB) {
+ if (InstOrdering[UI] < InstOrdering[InsertPoint])
+ InsertPoint = UI;
+ continue;
+ }
ToReplace.push_back(U);
}
SetVector<Instruction *> MaybeDead;
DenseMap<Instruction *, Instruction *> NewInstructions;
- Instruction *InsertPoint = I;
for (Use *U : ToReplace) {
auto *UI = cast<Instruction>(U->get());
Instruction *NI = UI->clone();
@@ -7863,8 +7855,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
- if (BinOp && (BinOp->getOpcode() == Instruction::And) && EnableAndCmpSinking)
- return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts);
+ if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking &&
+ sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))
+ return true;
// TODO: Move this into the switch on opcode - it handles shifts already.
if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
@@ -8030,9 +8023,8 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
DominatorTree DT(F);
for (BasicBlock &BB : F) {
- for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
- Instruction *Insn = &*BI++;
- DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
+ for (Instruction &Insn : llvm::make_early_inc_range(BB)) {
+ DbgValueInst *DVI = dyn_cast<DbgValueInst>(&Insn);
if (!DVI)
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
index f3cba6225107..a1ff02178ffa 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
@@ -65,6 +65,7 @@ CGOPT(DenormalMode::DenormalModeKind, DenormalFP32Math)
CGOPT(bool, EnableHonorSignDependentRoundingFPMath)
CGOPT(FloatABI::ABIType, FloatABIForCalls)
CGOPT(FPOpFusion::FPOpFusionMode, FuseFPOps)
+CGOPT(SwiftAsyncFramePointerMode, SwiftAsyncFramePointer)
CGOPT(bool, DontPlaceZerosInBSS)
CGOPT(bool, EnableGuaranteedTailCallOpt)
CGOPT(bool, DisableTailCalls)
@@ -89,11 +90,11 @@ CGOPT(bool, EnableAddrsig)
CGOPT(bool, EmitCallSiteInfo)
CGOPT(bool, EnableMachineFunctionSplitter)
CGOPT(bool, EnableDebugEntryValues)
-CGOPT(bool, PseudoProbeForProfiling)
CGOPT(bool, ValueTrackingVariableLocations)
CGOPT(bool, ForceDwarfFrameSection)
CGOPT(bool, XRayOmitFunctionIndex)
CGOPT(bool, DebugStrictDwarf)
+CGOPT(unsigned, AlignLoops)
codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
#define CGBINDOPT(NAME) \
@@ -277,6 +278,18 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
"Only fuse FP ops when the result won't be affected.")));
CGBINDOPT(FuseFPOps);
+ static cl::opt<SwiftAsyncFramePointerMode> SwiftAsyncFramePointer(
+ "swift-async-fp",
+ cl::desc("Determine when the Swift async frame pointer should be set"),
+ cl::init(SwiftAsyncFramePointerMode::Always),
+ cl::values(clEnumValN(SwiftAsyncFramePointerMode::DeploymentBased, "auto",
+ "Determine based on deployment target"),
+ clEnumValN(SwiftAsyncFramePointerMode::Always, "always",
+ "Always set the bit"),
+ clEnumValN(SwiftAsyncFramePointerMode::Never, "never",
+ "Never set the bit")));
+ CGBINDOPT(SwiftAsyncFramePointer);
+
static cl::opt<bool> DontPlaceZerosInBSS(
"nozero-initialized-in-bss",
cl::desc("Don't place zero-initialized symbols into bss section"),
@@ -420,11 +433,6 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(EnableDebugEntryValues);
- static cl::opt<bool> PseudoProbeForProfiling(
- "pseudo-probe-for-profiling", cl::desc("Emit pseudo probes for AutoFDO"),
- cl::init(false));
- CGBINDOPT(PseudoProbeForProfiling);
-
static cl::opt<bool> ValueTrackingVariableLocations(
"experimental-debug-variable-locations",
cl::desc("Use experimental new value-tracking variable locations"),
@@ -452,6 +460,10 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
"strict-dwarf", cl::desc("use strict dwarf"), cl::init(false));
CGBINDOPT(DebugStrictDwarf);
+ static cl::opt<unsigned> AlignLoops("align-loops",
+ cl::desc("Default alignment for loops"));
+ CGBINDOPT(AlignLoops);
+
#undef CGBINDOPT
mc::RegisterMCTargetOptionsFlags();
@@ -522,18 +534,18 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.EmitAddrsig = getEnableAddrsig();
Options.EmitCallSiteInfo = getEmitCallSiteInfo();
Options.EnableDebugEntryValues = getEnableDebugEntryValues();
- Options.PseudoProbeForProfiling = getPseudoProbeForProfiling();
Options.ValueTrackingVariableLocations = getValueTrackingVariableLocations();
Options.ForceDwarfFrameSection = getForceDwarfFrameSection();
Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex();
Options.DebugStrictDwarf = getDebugStrictDwarf();
+ Options.LoopAlignment = getAlignLoops();
Options.MCOptions = mc::InitMCTargetOptionsFromFlags();
Options.ThreadModel = getThreadModel();
Options.EABIVersion = getEABIVersion();
Options.DebuggerTuning = getDebuggerTuningOpt();
-
+ Options.SwiftAsyncFramePointer = getSwiftAsyncFramePointer();
return Options;
}
@@ -666,13 +678,11 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
if (const auto *F = Call->getCalledFunction())
if (F->getIntrinsicID() == Intrinsic::debugtrap ||
F->getIntrinsicID() == Intrinsic::trap)
- Call->addAttribute(
- AttributeList::FunctionIndex,
+ Call->addFnAttr(
Attribute::get(Ctx, "trap-func-name", getTrapFuncName()));
// Let NewAttrs override Attrs.
- F.setAttributes(
- Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs));
+ F.setAttributes(Attrs.addFnAttributes(Ctx, NewAttrs));
}
/// Set function attributes of functions in Module M based on CPU,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index c56c8c87734f..981f5973fee8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -212,6 +212,21 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
RegRefs.insert(std::make_pair(Reg, &MO));
+ if (MO.isUse() && Special) {
+ if (!KeepRegs.test(Reg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ KeepRegs.set(*SubRegs);
+ }
+ }
+ }
+
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg()) continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isValid())
+ continue;
// If this reg is tied and live (Classes[Reg] is set to -1), we can't change
// it or any of its sub or super regs. We need to use KeepRegs to mark the
// reg because not all uses of the same reg within an instruction are
@@ -222,7 +237,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
// of a register? In the above 'xor' example, the uses of %eax are undef, so
// earlier instructions could still replace %eax even though the 'xor'
// itself can't be changed.
- if (MI.isRegTiedToUseOperand(i) &&
+ if (MI.isRegTiedToUseOperand(I) &&
Classes[Reg] == reinterpret_cast<TargetRegisterClass *>(-1)) {
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs) {
@@ -233,14 +248,6 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
KeepRegs.set(*SuperRegs);
}
}
-
- if (MO.isUse() && Special) {
- if (!KeepRegs.test(Reg)) {
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- KeepRegs.set(*SubRegs);
- }
- }
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index 6e7db95b5c2a..c6c0b79cd7e7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -138,26 +138,22 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {
// Now scan the instructions and delete dead ones, tracking physreg
// liveness as we go.
- for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(),
- MIE = MBB->rend();
- MII != MIE;) {
- MachineInstr *MI = &*MII++;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(*MBB))) {
// If the instruction is dead, delete it!
- if (isDead(MI)) {
- LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
+ if (isDead(&MI)) {
+ LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << MI);
// It is possible that some DBG_VALUE instructions refer to this
// instruction. They get marked as undef and will be deleted
// in the live debug variable analysis.
- MI->eraseFromParentAndMarkDBGValuesForRemoval();
+ MI.eraseFromParentAndMarkDBGValuesForRemoval();
AnyChanges = true;
++NumDeletes;
continue;
}
// Record the physreg defs.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
if (MO.isReg() && MO.isDef()) {
Register Reg = MO.getReg();
if (Register::isPhysicalRegister(Reg)) {
@@ -175,8 +171,8 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {
}
// Record the physreg uses, after the defs, in case a physreg is
// both defined and used in the same instruction.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
if (MO.isReg() && MO.isUse()) {
Register Reg = MO.getReg();
if (Register::isPhysicalRegister(Reg)) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index 5ca1e91cc5f4..fb8a3e383950 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/EHPersonalities.h"
@@ -54,13 +55,11 @@ namespace {
class DwarfEHPrepare {
CodeGenOpt::Level OptLevel;
- // RewindFunction - _Unwind_Resume or the target equivalent.
- FunctionCallee &RewindFunction;
-
Function &F;
const TargetLowering &TLI;
DomTreeUpdater *DTU;
const TargetTransformInfo *TTI;
+ const Triple &TargetTriple;
/// Return the exception object from the value passed into
/// the 'resume' instruction (typically an aggregate). Clean up any dead
@@ -78,11 +77,11 @@ class DwarfEHPrepare {
bool InsertUnwindResumeCalls();
public:
- DwarfEHPrepare(CodeGenOpt::Level OptLevel_, FunctionCallee &RewindFunction_,
- Function &F_, const TargetLowering &TLI_, DomTreeUpdater *DTU_,
- const TargetTransformInfo *TTI_)
- : OptLevel(OptLevel_), RewindFunction(RewindFunction_), F(F_), TLI(TLI_),
- DTU(DTU_), TTI(TTI_) {}
+ DwarfEHPrepare(CodeGenOpt::Level OptLevel_, Function &F_,
+ const TargetLowering &TLI_, DomTreeUpdater *DTU_,
+ const TargetTransformInfo *TTI_, const Triple &TargetTriple_)
+ : OptLevel(OptLevel_), F(F_), TLI(TLI_), DTU(DTU_), TTI(TTI_),
+ TargetTriple(TargetTriple_) {}
bool run();
};
@@ -211,13 +210,28 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
if (ResumesLeft == 0)
return true; // We pruned them all.
- // Find the rewind function if we didn't already.
- if (!RewindFunction) {
- FunctionType *FTy =
+ // RewindFunction - _Unwind_Resume or the target equivalent.
+ FunctionCallee RewindFunction;
+ CallingConv::ID RewindFunctionCallingConv;
+ FunctionType *FTy;
+ const char *RewindName;
+ bool DoesRewindFunctionNeedExceptionObject;
+
+ if ((Pers == EHPersonality::GNU_CXX || Pers == EHPersonality::GNU_CXX_SjLj) &&
+ TargetTriple.isTargetEHABICompatible()) {
+ RewindName = TLI.getLibcallName(RTLIB::CXA_END_CLEANUP);
+ FTy = FunctionType::get(Type::getVoidTy(Ctx), false);
+ RewindFunctionCallingConv =
+ TLI.getLibcallCallingConv(RTLIB::CXA_END_CLEANUP);
+ DoesRewindFunctionNeedExceptionObject = false;
+ } else {
+ RewindName = TLI.getLibcallName(RTLIB::UNWIND_RESUME);
+ FTy =
FunctionType::get(Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx), false);
- const char *RewindName = TLI.getLibcallName(RTLIB::UNWIND_RESUME);
- RewindFunction = F.getParent()->getOrInsertFunction(RewindName, FTy);
+ RewindFunctionCallingConv = TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME);
+ DoesRewindFunctionNeedExceptionObject = true;
}
+ RewindFunction = F.getParent()->getOrInsertFunction(RewindName, FTy);
// Create the basic block where the _Unwind_Resume call will live.
if (ResumesLeft == 1) {
@@ -226,10 +240,14 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
ResumeInst *RI = Resumes.front();
BasicBlock *UnwindBB = RI->getParent();
Value *ExnObj = GetExceptionObject(RI);
+ llvm::SmallVector<Value *, 1> RewindFunctionArgs;
+ if (DoesRewindFunctionNeedExceptionObject)
+ RewindFunctionArgs.push_back(ExnObj);
- // Call the _Unwind_Resume function.
- CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB);
- CI->setCallingConv(TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+ // Call the rewind function.
+ CallInst *CI =
+ CallInst::Create(RewindFunction, RewindFunctionArgs, "", UnwindBB);
+ CI->setCallingConv(RewindFunctionCallingConv);
// We never expect _Unwind_Resume to return.
CI->setDoesNotReturn();
@@ -240,6 +258,8 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
std::vector<DominatorTree::UpdateType> Updates;
Updates.reserve(Resumes.size());
+ llvm::SmallVector<Value *, 1> RewindFunctionArgs;
+
BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &F);
PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesLeft, "exn.obj",
UnwindBB);
@@ -257,9 +277,13 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
++NumResumesLowered;
}
+ if (DoesRewindFunctionNeedExceptionObject)
+ RewindFunctionArgs.push_back(PN);
+
// Call the function.
- CallInst *CI = CallInst::Create(RewindFunction, PN, "", UnwindBB);
- CI->setCallingConv(TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+ CallInst *CI =
+ CallInst::Create(RewindFunction, RewindFunctionArgs, "", UnwindBB);
+ CI->setCallingConv(RewindFunctionCallingConv);
// We never expect _Unwind_Resume to return.
CI->setDoesNotReturn();
@@ -277,22 +301,20 @@ bool DwarfEHPrepare::run() {
return Changed;
}
-static bool prepareDwarfEH(CodeGenOpt::Level OptLevel,
- FunctionCallee &RewindFunction, Function &F,
+static bool prepareDwarfEH(CodeGenOpt::Level OptLevel, Function &F,
const TargetLowering &TLI, DominatorTree *DT,
- const TargetTransformInfo *TTI) {
+ const TargetTransformInfo *TTI,
+ const Triple &TargetTriple) {
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
- return DwarfEHPrepare(OptLevel, RewindFunction, F, TLI, DT ? &DTU : nullptr,
- TTI)
+ return DwarfEHPrepare(OptLevel, F, TLI, DT ? &DTU : nullptr, TTI,
+ TargetTriple)
.run();
}
namespace {
class DwarfEHPrepareLegacyPass : public FunctionPass {
- // RewindFunction - _Unwind_Resume or the target equivalent.
- FunctionCallee RewindFunction = nullptr;
CodeGenOpt::Level OptLevel;
@@ -315,7 +337,7 @@ public:
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
}
- return prepareDwarfEH(OptLevel, RewindFunction, F, TLI, DT, TTI);
+ return prepareDwarfEH(OptLevel, F, TLI, DT, TTI, TM.getTargetTriple());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 50fdc2114780..d0c2b8c267ff 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -348,17 +348,17 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
ConstantInt::get(Diff->getType(), 0));
BranchInst *CmpBr =
BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp);
+ Builder.Insert(CmpBr);
if (DTU)
DTU->applyUpdates(
{{DominatorTree::Insert, BB, EndBlock},
{DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}});
- Builder.Insert(CmpBr);
} else {
// The last block has an unconditional branch to EndBlock.
BranchInst *CmpBr = BranchInst::Create(EndBlock);
+ Builder.Insert(CmpBr);
if (DTU)
DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}});
- Builder.Insert(CmpBr);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
index d909d6aa5b0a..7300ea6b50ee 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -189,12 +189,7 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
- for (MachineBasicBlock::iterator mi = MBB.begin(), me = MBB.end();
- mi != me;) {
- MachineInstr &MI = *mi;
- // Advance iterator here because MI may be erased.
- ++mi;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
// Only expand pseudos.
if (!MI.isPseudo())
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp
index a8d4d4ebe8bd..bb8d2b3e9a78 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -158,6 +158,11 @@ struct CachingVPExpander {
Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder,
VPIntrinsic &PI);
+ /// \brief Lower this VP reduction to a call to an unpredicated reduction
+ /// intrinsic.
+ Value *expandPredicationInReduction(IRBuilder<> &Builder,
+ VPReductionIntrinsic &PI);
+
/// \brief Query TTI and expand the vector predication in \p P accordingly.
Value *expandPredication(VPIntrinsic &PI);
@@ -248,6 +253,136 @@ CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder,
return NewBinOp;
}
+static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI,
+ Type *EltTy) {
+ bool Negative = false;
+ unsigned EltBits = EltTy->getScalarSizeInBits();
+ switch (VPI.getIntrinsicID()) {
+ default:
+ llvm_unreachable("Expecting a VP reduction intrinsic");
+ case Intrinsic::vp_reduce_add:
+ case Intrinsic::vp_reduce_or:
+ case Intrinsic::vp_reduce_xor:
+ case Intrinsic::vp_reduce_umax:
+ return Constant::getNullValue(EltTy);
+ case Intrinsic::vp_reduce_mul:
+ return ConstantInt::get(EltTy, 1, /*IsSigned*/ false);
+ case Intrinsic::vp_reduce_and:
+ case Intrinsic::vp_reduce_umin:
+ return ConstantInt::getAllOnesValue(EltTy);
+ case Intrinsic::vp_reduce_smin:
+ return ConstantInt::get(EltTy->getContext(),
+ APInt::getSignedMaxValue(EltBits));
+ case Intrinsic::vp_reduce_smax:
+ return ConstantInt::get(EltTy->getContext(),
+ APInt::getSignedMinValue(EltBits));
+ case Intrinsic::vp_reduce_fmax:
+ Negative = true;
+ LLVM_FALLTHROUGH;
+ case Intrinsic::vp_reduce_fmin: {
+ FastMathFlags Flags = VPI.getFastMathFlags();
+ const fltSemantics &Semantics = EltTy->getFltSemantics();
+ return !Flags.noNaNs() ? ConstantFP::getQNaN(EltTy, Negative)
+ : !Flags.noInfs()
+ ? ConstantFP::getInfinity(EltTy, Negative)
+ : ConstantFP::get(EltTy,
+ APFloat::getLargest(Semantics, Negative));
+ }
+ case Intrinsic::vp_reduce_fadd:
+ return ConstantFP::getNegativeZero(EltTy);
+ case Intrinsic::vp_reduce_fmul:
+ return ConstantFP::get(EltTy, 1.0);
+ }
+}
+
+Value *
+CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder,
+ VPReductionIntrinsic &VPI) {
+ assert((isSafeToSpeculativelyExecute(&VPI) ||
+ VPI.canIgnoreVectorLengthParam()) &&
+ "Implicitly dropping %evl in non-speculatable operator!");
+
+ Value *Mask = VPI.getMaskParam();
+ Value *RedOp = VPI.getOperand(VPI.getVectorParamPos());
+
+ // Insert neutral element in masked-out positions
+ if (Mask && !isAllTrueMask(Mask)) {
+ auto *NeutralElt = getNeutralReductionElement(VPI, VPI.getType());
+ auto *NeutralVector = Builder.CreateVectorSplat(
+ cast<VectorType>(RedOp->getType())->getElementCount(), NeutralElt);
+ RedOp = Builder.CreateSelect(Mask, RedOp, NeutralVector);
+ }
+
+ Value *Reduction;
+ Value *Start = VPI.getOperand(VPI.getStartParamPos());
+
+ switch (VPI.getIntrinsicID()) {
+ default:
+ llvm_unreachable("Impossible reduction kind");
+ case Intrinsic::vp_reduce_add:
+ Reduction = Builder.CreateAddReduce(RedOp);
+ Reduction = Builder.CreateAdd(Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_mul:
+ Reduction = Builder.CreateMulReduce(RedOp);
+ Reduction = Builder.CreateMul(Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_and:
+ Reduction = Builder.CreateAndReduce(RedOp);
+ Reduction = Builder.CreateAnd(Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_or:
+ Reduction = Builder.CreateOrReduce(RedOp);
+ Reduction = Builder.CreateOr(Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_xor:
+ Reduction = Builder.CreateXorReduce(RedOp);
+ Reduction = Builder.CreateXor(Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_smax:
+ Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ true);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::smax, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_smin:
+ Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ true);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::smin, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_umax:
+ Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ false);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::umax, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_umin:
+ Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ false);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::umin, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_fmax:
+ Reduction = Builder.CreateFPMaxReduce(RedOp);
+ transferDecorations(*Reduction, VPI);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_fmin:
+ Reduction = Builder.CreateFPMinReduce(RedOp);
+ transferDecorations(*Reduction, VPI);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::minnum, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_fadd:
+ Reduction = Builder.CreateFAddReduce(Start, RedOp);
+ break;
+ case Intrinsic::vp_reduce_fmul:
+ Reduction = Builder.CreateFMulReduce(Start, RedOp);
+ break;
+ }
+
+ replaceOperation(*Reduction, VPI);
+ return Reduction;
+}
+
void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) {
LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n");
@@ -321,6 +456,9 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
if (OC && Instruction::isBinaryOp(*OC))
return expandPredicationInBinaryOperator(Builder, VPI);
+ if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI))
+ return expandPredicationInReduction(Builder, *VPRI);
+
return &VPI;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
index e3c4e86d203b..ec6bf18b2769 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
@@ -1,9 +1,8 @@
//===-- FixupStatepointCallerSaved.cpp - Fixup caller saved registers ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp
index 8fae798b31d9..af5515cc6bfd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp
@@ -145,24 +145,9 @@ GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) {
if (NMI != GCStrategyMap.end())
return NMI->getValue();
- for (auto& Entry : GCRegistry::entries()) {
- if (Name == Entry.getName()) {
- std::unique_ptr<GCStrategy> S = Entry.instantiate();
- S->Name = std::string(Name);
- GCStrategyMap[Name] = S.get();
- GCStrategyList.push_back(std::move(S));
- return GCStrategyList.back().get();
- }
- }
-
- if (GCRegistry::begin() == GCRegistry::end()) {
- // In normal operation, the registry should not be empty. There should
- // be the builtin GCs if nothing else. The most likely scenario here is
- // that we got here without running the initializers used by the Registry
- // itself and it's registration mechanism.
- const std::string error = ("unsupported GC: " + Name).str() +
- " (did you remember to link and initialize the CodeGen library?)";
- report_fatal_error(error);
- } else
- report_fatal_error(std::string("unsupported GC: ") + Name);
+ std::unique_ptr<GCStrategy> S = llvm::getGCStrategy(Name);
+ S->Name = std::string(Name);
+ GCStrategyMap[Name] = S.get();
+ GCStrategyList.push_back(std::move(S));
+ return GCStrategyList.back().get();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
index 58269e172c57..637a877810a1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -193,8 +193,8 @@ bool LowerIntrinsics::DoLowering(Function &F, GCStrategy &S) {
bool MadeChange = false;
for (BasicBlock &BB : F)
- for (BasicBlock::iterator II = BB.begin(), E = BB.end(); II != E;) {
- IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++);
+ for (Instruction &I : llvm::make_early_inc_range(BB)) {
+ IntrinsicInst *CI = dyn_cast<IntrinsicInst>(&I);
if (!CI)
continue;
@@ -271,16 +271,15 @@ void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF)
- for (MachineBasicBlock::iterator MI = MBB.begin(), ME = MBB.end();
- MI != ME; ++MI)
- if (MI->isCall()) {
+ for (MachineInstr &MI : MBB)
+ if (MI.isCall()) {
// Do not treat tail or sibling call sites as safe points. This is
// legal since any arguments passed to the callee which live in the
// remnants of the callers frame will be owned and updated by the
// callee if required.
- if (MI->isTerminator())
+ if (MI.isTerminator())
continue;
- VisitCallPoint(MI);
+ VisitCallPoint(&MI);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index dd560e8ff145..2676becdd807 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -13,6 +13,8 @@
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/IR/DebugInfoMetadata.h"
using namespace llvm;
@@ -187,6 +189,14 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
// Try to constant fold these.
assert(SrcOps.size() == 2 && "Invalid sources");
assert(DstOps.size() == 1 && "Invalid dsts");
+ if (SrcOps[0].getLLTTy(*getMRI()).isVector()) {
+ // Try to constant fold vector constants.
+ auto VecCst = ConstantFoldVectorBinop(
+ Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI(), *this);
+ if (VecCst)
+ return MachineInstrBuilder(getMF(), *VecCst);
+ break;
+ }
if (Optional<APInt> Cst = ConstantFoldBinOp(Opc, SrcOps[0].getReg(),
SrcOps[1].getReg(), *getMRI()))
return buildConstant(DstOps[0], *Cst);
@@ -213,6 +223,22 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
return buildFConstant(DstOps[0], *Cst);
break;
}
+ case TargetOpcode::G_CTLZ: {
+ assert(SrcOps.size() == 1 && "Expected one source");
+ assert(DstOps.size() == 1 && "Expected one dest");
+ auto MaybeCsts = ConstantFoldCTLZ(SrcOps[0].getReg(), *getMRI());
+ if (!MaybeCsts)
+ break;
+ if (MaybeCsts->size() == 1)
+ return buildConstant(DstOps[0], (*MaybeCsts)[0]);
+ // This was a vector constant. Build a G_BUILD_VECTOR for them.
+ SmallVector<Register> ConstantRegs;
+ LLT VecTy = DstOps[0].getLLTTy(*getMRI());
+ for (unsigned Cst : *MaybeCsts)
+ ConstantRegs.emplace_back(
+ buildConstant(VecTy.getScalarType(), Cst).getReg(0));
+ return buildBuildVector(DstOps[0], ConstantRegs);
+ }
}
bool CanCopy = checkCopyToDefsPossible(DstOps);
if (!canPerformCSEForOpc(Opc))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index d2cda9ece31a..17094a8e44f8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -73,7 +74,7 @@ void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags,
const AttributeList &Attrs,
unsigned OpIdx) const {
addFlagsUsingAttrFn(Flags, [&Attrs, &OpIdx](Attribute::AttrKind Attr) {
- return Attrs.hasAttribute(OpIdx, Attr);
+ return Attrs.hasAttributeAtIndex(OpIdx, Attr);
});
}
@@ -139,6 +140,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
if (!Info.OrigRet.Ty->isVoidTy())
setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB);
+ Info.CB = &CB;
Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees);
Info.CallConv = CallConv;
Info.SwiftErrorVReg = SwiftErrorVReg;
@@ -165,18 +167,21 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
Align MemAlign = DL.getABITypeAlign(Arg.Ty);
if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
assert(OpIdx >= AttributeList::FirstArgIndex);
- Type *ElementTy = PtrTy->getElementType();
+ unsigned ParamIdx = OpIdx - AttributeList::FirstArgIndex;
- auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType();
- Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy));
+ Type *ElementTy = FuncInfo.getParamByValType(ParamIdx);
+ if (!ElementTy)
+ ElementTy = FuncInfo.getParamInAllocaType(ParamIdx);
+ if (!ElementTy)
+ ElementTy = FuncInfo.getParamPreallocatedType(ParamIdx);
+ assert(ElementTy && "Must have byval, inalloca or preallocated type");
+ Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
// For ByVal, alignment should be passed from FE. BE will guess if
// this info is not there but there are cases it cannot get right.
- if (auto ParamAlign =
- FuncInfo.getParamStackAlign(OpIdx - AttributeList::FirstArgIndex))
+ if (auto ParamAlign = FuncInfo.getParamStackAlign(ParamIdx))
MemAlign = *ParamAlign;
- else if ((ParamAlign =
- FuncInfo.getParamAlign(OpIdx - AttributeList::FirstArgIndex)))
+ else if ((ParamAlign = FuncInfo.getParamAlign(ParamIdx)))
MemAlign = *ParamAlign;
else
MemAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL));
@@ -613,14 +618,31 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
const unsigned NumArgs = Args.size();
+ // Stores thunks for outgoing register assignments. This is used so we delay
+ // generating register copies until mem loc assignments are done. We do this
+ // so that if the target is using the delayed stack protector feature, we can
+ // find the split point of the block accurately. E.g. if we have:
+ // G_STORE %val, %memloc
+ // $x0 = COPY %foo
+ // $x1 = COPY %bar
+ // CALL func
+ // ... then the split point for the block will correctly be at, and including,
+ // the copy to $x0. If instead the G_STORE instruction immediately precedes
+ // the CALL, then we'd prematurely choose the CALL as the split point, thus
+ // generating a split block with a CALL that uses undefined physregs.
+ SmallVector<std::function<void()>> DelayedOutgoingRegAssignments;
+
for (unsigned i = 0, j = 0; i != NumArgs; ++i, ++j) {
assert(j < ArgLocs.size() && "Skipped too many arg locs");
CCValAssign &VA = ArgLocs[j];
assert(VA.getValNo() == i && "Location doesn't correspond to current arg");
if (VA.needsCustom()) {
- unsigned NumArgRegs =
- Handler.assignCustomValue(Args[i], makeArrayRef(ArgLocs).slice(j));
+ std::function<void()> Thunk;
+ unsigned NumArgRegs = Handler.assignCustomValue(
+ Args[i], makeArrayRef(ArgLocs).slice(j), &Thunk);
+ if (Thunk)
+ DelayedOutgoingRegAssignments.emplace_back(Thunk);
if (!NumArgRegs)
return false;
j += NumArgRegs;
@@ -739,7 +761,13 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
continue;
}
- Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ if (Handler.isIncomingArgumentHandler())
+ Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ else {
+ DelayedOutgoingRegAssignments.emplace_back([=, &Handler]() {
+ Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ });
+ }
}
// Now that all pieces have been assigned, re-pack the register typed values
@@ -753,6 +781,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
j += NumParts - 1;
}
+ for (auto &Fn : DelayedOutgoingRegAssignments)
+ Fn();
return true;
}
@@ -1153,7 +1183,7 @@ static bool isCopyCompatibleType(LLT SrcTy, LLT DstTy) {
void CallLowering::IncomingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
- CCValAssign &VA) {
+ CCValAssign VA) {
const MVT LocVT = VA.getLocVT();
const LLT LocTy(LocVT);
const LLT RegTy = MRI.getType(ValVReg);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index 6f103bca6892..381c6df5c97a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -130,16 +130,15 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
WrapperObserver.addObserver(CSEInfo);
RAIIDelegateInstaller DelInstall(MF, &WrapperObserver);
for (MachineBasicBlock *MBB : post_order(&MF)) {
- for (auto MII = MBB->rbegin(), MIE = MBB->rend(); MII != MIE;) {
- MachineInstr *CurMI = &*MII;
- ++MII;
+ for (MachineInstr &CurMI :
+ llvm::make_early_inc_range(llvm::reverse(*MBB))) {
// Erase dead insts before even adding to the list.
- if (isTriviallyDead(*CurMI, *MRI)) {
- LLVM_DEBUG(dbgs() << *CurMI << "Is dead; erasing.\n");
- CurMI->eraseFromParentAndMarkDBGValuesForRemoval();
+ if (isTriviallyDead(CurMI, *MRI)) {
+ LLVM_DEBUG(dbgs() << CurMI << "Is dead; erasing.\n");
+ CurMI.eraseFromParentAndMarkDBGValuesForRemoval();
continue;
}
- WorkList.deferred_insert(CurMI);
+ WorkList.deferred_insert(&CurMI);
}
}
WorkList.finalize();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 06d827de2e96..3a52959d54bf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -12,9 +12,11 @@
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -26,8 +28,10 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/DivisionByConstantInfo.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetMachine.h"
#include <tuple>
#define DEBUG_TYPE "gi-combiner"
@@ -46,8 +50,9 @@ CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
MachineIRBuilder &B, GISelKnownBits *KB,
MachineDominatorTree *MDT,
const LegalizerInfo *LI)
- : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
- KB(KB), MDT(MDT), LI(LI) {
+ : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB),
+ MDT(MDT), LI(LI), RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
+ TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
(void)this->KB;
}
@@ -64,6 +69,16 @@ static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
return I;
}
+/// Determines the LogBase2 value for a non-null input value using the
+/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
+static Register buildLogBase2(Register V, MachineIRBuilder &MIB) {
+ auto &MRI = *MIB.getMRI();
+ LLT Ty = MRI.getType(V);
+ auto Ctlz = MIB.buildCTLZ(Ty, V);
+ auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
+ return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
+}
+
/// \returns The big endian in-memory byte position of byte \p I in a
/// \p ByteWidth bytes wide type.
///
@@ -143,6 +158,24 @@ void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
Observer.changedInstr(*FromRegOp.getParent());
}
+void CombinerHelper::replaceOpcodeWith(MachineInstr &FromMI,
+ unsigned ToOpcode) const {
+ Observer.changingInstr(FromMI);
+
+ FromMI.setDesc(Builder.getTII().get(ToOpcode));
+
+ Observer.changedInstr(FromMI);
+}
+
+const RegisterBank *CombinerHelper::getRegBank(Register Reg) const {
+ return RBI->getRegBank(Reg, MRI, *TRI);
+}
+
+void CombinerHelper::setRegBank(Register Reg, const RegisterBank *RegBank) {
+ if (RegBank)
+ MRI.setRegBank(Reg, *RegBank);
+}
+
bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
if (matchCombineCopy(MI)) {
applyCombineCopy(MI);
@@ -486,10 +519,7 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
continue;
// Check for legality.
if (LI) {
- LegalityQuery::MemDesc MMDesc;
- MMDesc.MemoryTy = MMO.getMemoryType();
- MMDesc.AlignInBits = MMO.getAlign().value() * 8;
- MMDesc.Ordering = MMO.getSuccessOrdering();
+ LegalityQuery::MemDesc MMDesc(MMO);
LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
if (LI->getAction({LoadMI->getOpcode(), {UseTy, SrcTy}, {MMDesc}})
@@ -623,13 +653,83 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
Observer.changedInstr(MI);
}
+bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+
+ // If we have the following code:
+ // %mask = G_CONSTANT 255
+ // %ld = G_LOAD %ptr, (load s16)
+ // %and = G_AND %ld, %mask
+ //
+ // Try to fold it into
+ // %ld = G_ZEXTLOAD %ptr, (load s8)
+
+ Register Dst = MI.getOperand(0).getReg();
+ if (MRI.getType(Dst).isVector())
+ return false;
+
+ auto MaybeMask =
+ getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ if (!MaybeMask)
+ return false;
+
+ APInt MaskVal = MaybeMask->Value;
+
+ if (!MaskVal.isMask())
+ return false;
+
+ Register SrcReg = MI.getOperand(1).getReg();
+ GAnyLoad *LoadMI = getOpcodeDef<GAnyLoad>(SrcReg, MRI);
+ if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()) ||
+ !LoadMI->isSimple())
+ return false;
+
+ Register LoadReg = LoadMI->getDstReg();
+ LLT LoadTy = MRI.getType(LoadReg);
+ Register PtrReg = LoadMI->getPointerReg();
+ uint64_t LoadSizeBits = LoadMI->getMemSizeInBits();
+ unsigned MaskSizeBits = MaskVal.countTrailingOnes();
+
+ // The mask may not be larger than the in-memory type, as it might cover sign
+ // extended bits
+ if (MaskSizeBits > LoadSizeBits)
+ return false;
+
+ // If the mask covers the whole destination register, there's nothing to
+ // extend
+ if (MaskSizeBits >= LoadTy.getSizeInBits())
+ return false;
+
+ // Most targets cannot deal with loads of size < 8 and need to re-legalize to
+ // at least byte loads. Avoid creating such loads here
+ if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
+ return false;
+
+ const MachineMemOperand &MMO = LoadMI->getMMO();
+ LegalityQuery::MemDesc MemDesc(MMO);
+ MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_ZEXTLOAD, {LoadTy, MRI.getType(PtrReg)}, {MemDesc}}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*LoadMI);
+ auto &MF = B.getMF();
+ auto PtrInfo = MMO.getPointerInfo();
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MaskSizeBits / 8);
+ B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
+ };
+ return true;
+}
+
bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
const MachineInstr &UseMI) {
assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
"shouldn't consider debug uses");
assert(DefMI.getParent() == UseMI.getParent());
if (&DefMI == &UseMI)
- return false;
+ return true;
const MachineBasicBlock &MBB = *DefMI.getParent();
auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
return &MI == &DefMI || &MI == &UseMI;
@@ -711,6 +811,16 @@ bool CombinerHelper::matchSextInRegOfLoad(
// anyway for most targets.
if (!isPowerOf2_32(NewSizeBits))
return false;
+
+ const MachineMemOperand &MMO = LoadDef->getMMO();
+ LegalityQuery::MemDesc MMDesc(MMO);
+ MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
+ {MRI.getType(LoadDef->getDstReg()),
+ MRI.getType(LoadDef->getPointerReg())},
+ {MMDesc}}))
+ return false;
+
MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
return true;
}
@@ -1093,81 +1203,6 @@ void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
Observer.changedInstr(*BrCond);
}
-static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
- // On Darwin, -Os means optimize for size without hurting performance, so
- // only really optimize for size when -Oz (MinSize) is used.
- if (MF.getTarget().getTargetTriple().isOSDarwin())
- return MF.getFunction().hasMinSize();
- return MF.getFunction().hasOptSize();
-}
-
-// Returns a list of types to use for memory op lowering in MemOps. A partial
-// port of findOptimalMemOpLowering in TargetLowering.
-static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
- unsigned Limit, const MemOp &Op,
- unsigned DstAS, unsigned SrcAS,
- const AttributeList &FuncAttributes,
- const TargetLowering &TLI) {
- if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
- return false;
-
- LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
-
- if (Ty == LLT()) {
- // Use the largest scalar type whose alignment constraints are satisfied.
- // We only need to check DstAlign here as SrcAlign is always greater or
- // equal to DstAlign (or zero).
- Ty = LLT::scalar(64);
- if (Op.isFixedDstAlign())
- while (Op.getDstAlign() < Ty.getSizeInBytes() &&
- !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
- Ty = LLT::scalar(Ty.getSizeInBytes());
- assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
- // FIXME: check for the largest legal type we can load/store to.
- }
-
- unsigned NumMemOps = 0;
- uint64_t Size = Op.size();
- while (Size) {
- unsigned TySize = Ty.getSizeInBytes();
- while (TySize > Size) {
- // For now, only use non-vector load / store's for the left-over pieces.
- LLT NewTy = Ty;
- // FIXME: check for mem op safety and legality of the types. Not all of
- // SDAGisms map cleanly to GISel concepts.
- if (NewTy.isVector())
- NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
- NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
- unsigned NewTySize = NewTy.getSizeInBytes();
- assert(NewTySize > 0 && "Could not find appropriate type");
-
- // If the new LLT cannot cover all of the remaining bits, then consider
- // issuing a (or a pair of) unaligned and overlapping load / store.
- bool Fast;
- // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
- MVT VT = getMVTForLLT(Ty);
- if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
- TLI.allowsMisalignedMemoryAccesses(
- VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
- MachineMemOperand::MONone, &Fast) &&
- Fast)
- TySize = Size;
- else {
- Ty = NewTy;
- TySize = NewTySize;
- }
- }
-
- if (++NumMemOps > Limit)
- return false;
-
- MemOps.push_back(Ty);
- Size -= TySize;
- }
-
- return true;
-}
-
static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
if (Ty.isVector())
return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
@@ -1175,460 +1210,20 @@ static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
return IntegerType::get(C, Ty.getSizeInBits());
}
-// Get a vectorized representation of the memset value operand, GISel edition.
-static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
- MachineRegisterInfo &MRI = *MIB.getMRI();
- unsigned NumBits = Ty.getScalarSizeInBits();
- auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
- if (!Ty.isVector() && ValVRegAndVal) {
- APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8);
- APInt SplatVal = APInt::getSplat(NumBits, Scalar);
- return MIB.buildConstant(Ty, SplatVal).getReg(0);
- }
-
- // Extend the byte value to the larger type, and then multiply by a magic
- // value 0x010101... in order to replicate it across every byte.
- // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
- if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
- return MIB.buildConstant(Ty, 0).getReg(0);
- }
-
- LLT ExtType = Ty.getScalarType();
- auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
- if (NumBits > 8) {
- APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
- auto MagicMI = MIB.buildConstant(ExtType, Magic);
- Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
- }
-
- // For vector types create a G_BUILD_VECTOR.
- if (Ty.isVector())
- Val = MIB.buildSplatVector(Ty, Val).getReg(0);
-
- return Val;
-}
-
-bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
- Register Val, uint64_t KnownLen,
- Align Alignment, bool IsVolatile) {
- auto &MF = *MI.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
- auto &DL = MF.getDataLayout();
- LLVMContext &C = MF.getFunction().getContext();
-
- assert(KnownLen != 0 && "Have a zero length memset length!");
-
- bool DstAlignCanChange = false;
- MachineFrameInfo &MFI = MF.getFrameInfo();
- bool OptSize = shouldLowerMemFuncForSize(MF);
-
- MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
- if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
- DstAlignCanChange = true;
-
- unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
- std::vector<LLT> MemOps;
-
- const auto &DstMMO = **MI.memoperands_begin();
- MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
-
- auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
- bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
-
- if (!findGISelOptimalMemOpLowering(MemOps, Limit,
- MemOp::Set(KnownLen, DstAlignCanChange,
- Alignment,
- /*IsZeroMemset=*/IsZeroVal,
- /*IsVolatile=*/IsVolatile),
- DstPtrInfo.getAddrSpace(), ~0u,
- MF.getFunction().getAttributes(), TLI))
- return false;
-
- if (DstAlignCanChange) {
- // Get an estimate of the type from the LLT.
- Type *IRTy = getTypeForLLT(MemOps[0], C);
- Align NewAlign = DL.getABITypeAlign(IRTy);
- if (NewAlign > Alignment) {
- Alignment = NewAlign;
- unsigned FI = FIDef->getOperand(1).getIndex();
- // Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlign(FI) < Alignment)
- MFI.setObjectAlignment(FI, Alignment);
- }
- }
-
- MachineIRBuilder MIB(MI);
- // Find the largest store and generate the bit pattern for it.
- LLT LargestTy = MemOps[0];
- for (unsigned i = 1; i < MemOps.size(); i++)
- if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
- LargestTy = MemOps[i];
-
- // The memset stored value is always defined as an s8, so in order to make it
- // work with larger store types we need to repeat the bit pattern across the
- // wider type.
- Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
-
- if (!MemSetValue)
- return false;
-
- // Generate the stores. For each store type in the list, we generate the
- // matching store of that type to the destination address.
- LLT PtrTy = MRI.getType(Dst);
- unsigned DstOff = 0;
- unsigned Size = KnownLen;
- for (unsigned I = 0; I < MemOps.size(); I++) {
- LLT Ty = MemOps[I];
- unsigned TySize = Ty.getSizeInBytes();
- if (TySize > Size) {
- // Issuing an unaligned load / store pair that overlaps with the previous
- // pair. Adjust the offset accordingly.
- assert(I == MemOps.size() - 1 && I != 0);
- DstOff -= TySize - Size;
- }
-
- // If this store is smaller than the largest store see whether we can get
- // the smaller value for free with a truncate.
- Register Value = MemSetValue;
- if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
- MVT VT = getMVTForLLT(Ty);
- MVT LargestVT = getMVTForLLT(LargestTy);
- if (!LargestTy.isVector() && !Ty.isVector() &&
- TLI.isTruncateFree(LargestVT, VT))
- Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
- else
- Value = getMemsetValue(Val, Ty, MIB);
- if (!Value)
- return false;
- }
-
- auto *StoreMMO =
- MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
-
- Register Ptr = Dst;
- if (DstOff != 0) {
- auto Offset =
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
- Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
- }
-
- MIB.buildStore(Value, Ptr, *StoreMMO);
- DstOff += Ty.getSizeInBytes();
- Size -= TySize;
- }
-
- MI.eraseFromParent();
- return true;
-}
-
bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
-
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Len = MI.getOperand(2).getReg();
-
- const auto *MMOIt = MI.memoperands_begin();
- const MachineMemOperand *MemOp = *MMOIt;
- bool IsVolatile = MemOp->isVolatile();
-
- // See if this is a constant length copy
- auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
- // FIXME: support dynamically sized G_MEMCPY_INLINE
- assert(LenVRegAndVal.hasValue() &&
- "inline memcpy with dynamic size is not yet supported");
- uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
- if (KnownLen == 0) {
- MI.eraseFromParent();
- return true;
- }
-
- const auto &DstMMO = **MI.memoperands_begin();
- const auto &SrcMMO = **std::next(MI.memoperands_begin());
- Align DstAlign = DstMMO.getBaseAlign();
- Align SrcAlign = SrcMMO.getBaseAlign();
-
- return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
- IsVolatile);
-}
-
-bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI, Register Dst,
- Register Src, uint64_t KnownLen,
- Align DstAlign, Align SrcAlign,
- bool IsVolatile) {
- assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
- return optimizeMemcpy(MI, Dst, Src, KnownLen,
- std::numeric_limits<uint64_t>::max(), DstAlign,
- SrcAlign, IsVolatile);
-}
-
-bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
- Register Src, uint64_t KnownLen,
- uint64_t Limit, Align DstAlign,
- Align SrcAlign, bool IsVolatile) {
- auto &MF = *MI.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
- auto &DL = MF.getDataLayout();
- LLVMContext &C = MF.getFunction().getContext();
-
- assert(KnownLen != 0 && "Have a zero length memcpy length!");
-
- bool DstAlignCanChange = false;
- MachineFrameInfo &MFI = MF.getFrameInfo();
- Align Alignment = commonAlignment(DstAlign, SrcAlign);
-
- MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
- if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
- DstAlignCanChange = true;
-
- // FIXME: infer better src pointer alignment like SelectionDAG does here.
- // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
- // if the memcpy is in a tail call position.
-
- std::vector<LLT> MemOps;
-
- const auto &DstMMO = **MI.memoperands_begin();
- const auto &SrcMMO = **std::next(MI.memoperands_begin());
- MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
- MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
-
- if (!findGISelOptimalMemOpLowering(
- MemOps, Limit,
- MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
- IsVolatile),
- DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
- MF.getFunction().getAttributes(), TLI))
- return false;
-
- if (DstAlignCanChange) {
- // Get an estimate of the type from the LLT.
- Type *IRTy = getTypeForLLT(MemOps[0], C);
- Align NewAlign = DL.getABITypeAlign(IRTy);
-
- // Don't promote to an alignment that would require dynamic stack
- // realignment.
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- if (!TRI->hasStackRealignment(MF))
- while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
- NewAlign = NewAlign / 2;
-
- if (NewAlign > Alignment) {
- Alignment = NewAlign;
- unsigned FI = FIDef->getOperand(1).getIndex();
- // Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlign(FI) < Alignment)
- MFI.setObjectAlignment(FI, Alignment);
- }
- }
-
- LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
-
- MachineIRBuilder MIB(MI);
- // Now we need to emit a pair of load and stores for each of the types we've
- // collected. I.e. for each type, generate a load from the source pointer of
- // that type width, and then generate a corresponding store to the dest buffer
- // of that value loaded. This can result in a sequence of loads and stores
- // mixed types, depending on what the target specifies as good types to use.
- unsigned CurrOffset = 0;
- LLT PtrTy = MRI.getType(Src);
- unsigned Size = KnownLen;
- for (auto CopyTy : MemOps) {
- // Issuing an unaligned load / store pair that overlaps with the previous
- // pair. Adjust the offset accordingly.
- if (CopyTy.getSizeInBytes() > Size)
- CurrOffset -= CopyTy.getSizeInBytes() - Size;
-
- // Construct MMOs for the accesses.
- auto *LoadMMO =
- MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
- auto *StoreMMO =
- MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
-
- // Create the load.
- Register LoadPtr = Src;
- Register Offset;
- if (CurrOffset != 0) {
- Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
- .getReg(0);
- LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
- }
- auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
-
- // Create the store.
- Register StorePtr =
- CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
- MIB.buildStore(LdVal, StorePtr, *StoreMMO);
- CurrOffset += CopyTy.getSizeInBytes();
- Size -= CopyTy.getSizeInBytes();
- }
-
- MI.eraseFromParent();
- return true;
-}
-
-bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
- Register Src, uint64_t KnownLen,
- Align DstAlign, Align SrcAlign,
- bool IsVolatile) {
- auto &MF = *MI.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
- auto &DL = MF.getDataLayout();
- LLVMContext &C = MF.getFunction().getContext();
-
- assert(KnownLen != 0 && "Have a zero length memmove length!");
-
- bool DstAlignCanChange = false;
- MachineFrameInfo &MFI = MF.getFrameInfo();
- bool OptSize = shouldLowerMemFuncForSize(MF);
- Align Alignment = commonAlignment(DstAlign, SrcAlign);
-
- MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
- if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
- DstAlignCanChange = true;
-
- unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
- std::vector<LLT> MemOps;
-
- const auto &DstMMO = **MI.memoperands_begin();
- const auto &SrcMMO = **std::next(MI.memoperands_begin());
- MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
- MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
-
- // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
- // to a bug in it's findOptimalMemOpLowering implementation. For now do the
- // same thing here.
- if (!findGISelOptimalMemOpLowering(
- MemOps, Limit,
- MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
- /*IsVolatile*/ true),
- DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
- MF.getFunction().getAttributes(), TLI))
- return false;
-
- if (DstAlignCanChange) {
- // Get an estimate of the type from the LLT.
- Type *IRTy = getTypeForLLT(MemOps[0], C);
- Align NewAlign = DL.getABITypeAlign(IRTy);
-
- // Don't promote to an alignment that would require dynamic stack
- // realignment.
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- if (!TRI->hasStackRealignment(MF))
- while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
- NewAlign = NewAlign / 2;
-
- if (NewAlign > Alignment) {
- Alignment = NewAlign;
- unsigned FI = FIDef->getOperand(1).getIndex();
- // Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlign(FI) < Alignment)
- MFI.setObjectAlignment(FI, Alignment);
- }
- }
-
- LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
-
- MachineIRBuilder MIB(MI);
- // Memmove requires that we perform the loads first before issuing the stores.
- // Apart from that, this loop is pretty much doing the same thing as the
- // memcpy codegen function.
- unsigned CurrOffset = 0;
- LLT PtrTy = MRI.getType(Src);
- SmallVector<Register, 16> LoadVals;
- for (auto CopyTy : MemOps) {
- // Construct MMO for the load.
- auto *LoadMMO =
- MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
-
- // Create the load.
- Register LoadPtr = Src;
- if (CurrOffset != 0) {
- auto Offset =
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
- LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
- }
- LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
- CurrOffset += CopyTy.getSizeInBytes();
- }
-
- CurrOffset = 0;
- for (unsigned I = 0; I < MemOps.size(); ++I) {
- LLT CopyTy = MemOps[I];
- // Now store the values loaded.
- auto *StoreMMO =
- MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
-
- Register StorePtr = Dst;
- if (CurrOffset != 0) {
- auto Offset =
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
- StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
- }
- MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
- CurrOffset += CopyTy.getSizeInBytes();
- }
- MI.eraseFromParent();
- return true;
+ MachineIRBuilder HelperBuilder(MI);
+ GISelObserverWrapper DummyObserver;
+ LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
+ return Helper.lowerMemcpyInline(MI) ==
+ LegalizerHelper::LegalizeResult::Legalized;
}
bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
- const unsigned Opc = MI.getOpcode();
- // This combine is fairly complex so it's not written with a separate
- // matcher function.
- assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
- Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction");
-
- auto MMOIt = MI.memoperands_begin();
- const MachineMemOperand *MemOp = *MMOIt;
-
- Align DstAlign = MemOp->getBaseAlign();
- Align SrcAlign;
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Len = MI.getOperand(2).getReg();
-
- if (Opc != TargetOpcode::G_MEMSET) {
- assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
- MemOp = *(++MMOIt);
- SrcAlign = MemOp->getBaseAlign();
- }
-
- // See if this is a constant length copy
- auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
- if (!LenVRegAndVal)
- return false; // Leave it to the legalizer to lower it to a libcall.
- uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
-
- if (KnownLen == 0) {
- MI.eraseFromParent();
- return true;
- }
-
- bool IsVolatile = MemOp->isVolatile();
- if (Opc == TargetOpcode::G_MEMCPY_INLINE)
- return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
- IsVolatile);
-
- // Don't try to optimize volatile.
- if (IsVolatile)
- return false;
-
- if (MaxLen && KnownLen > MaxLen)
- return false;
-
- if (Opc == TargetOpcode::G_MEMCPY) {
- auto &MF = *MI.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
- bool OptSize = shouldLowerMemFuncForSize(MF);
- uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
- return optimizeMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
- IsVolatile);
- }
- if (Opc == TargetOpcode::G_MEMMOVE)
- return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
- if (Opc == TargetOpcode::G_MEMSET)
- return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
- return false;
+ MachineIRBuilder HelperBuilder(MI);
+ GISelObserverWrapper DummyObserver;
+ LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
+ return Helper.lowerMemCpyFamily(MI, MaxLen) ==
+ LegalizerHelper::LegalizeResult::Legalized;
}
static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy,
@@ -1706,30 +1301,52 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
Register Add2 = MI.getOperand(1).getReg();
Register Imm1 = MI.getOperand(2).getReg();
- auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
+ auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
if (!MaybeImmVal)
return false;
- // Don't do this combine if there multiple uses of the first PTR_ADD,
- // since we may be able to compute the second PTR_ADD as an immediate
- // offset anyway. Folding the first offset into the second may cause us
- // to go beyond the bounds of our legal addressing modes.
- if (!MRI.hasOneNonDBGUse(Add2))
- return false;
-
- MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
+ MachineInstr *Add2Def = MRI.getVRegDef(Add2);
if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
return false;
Register Base = Add2Def->getOperand(1).getReg();
Register Imm2 = Add2Def->getOperand(2).getReg();
- auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
+ auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
if (!MaybeImm2Val)
return false;
+ // Check if the new combined immediate forms an illegal addressing mode.
+ // Do not combine if it was legal before but would get illegal.
+ // To do so, we need to find a load/store user of the pointer to get
+ // the access type.
+ Type *AccessTy = nullptr;
+ auto &MF = *MI.getMF();
+ for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
+ if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
+ AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
+ MF.getFunction().getContext());
+ break;
+ }
+ }
+ TargetLoweringBase::AddrMode AMNew;
+ APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
+ AMNew.BaseOffs = CombinedImm.getSExtValue();
+ if (AccessTy) {
+ AMNew.HasBaseReg = true;
+ TargetLoweringBase::AddrMode AMOld;
+ AMOld.BaseOffs = MaybeImm2Val->Value.getSExtValue();
+ AMOld.HasBaseReg = true;
+ unsigned AS = MRI.getType(Add2).getAddressSpace();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
+ !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
+ return false;
+ }
+
// Pass the combined immediate to the apply function.
- MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue();
+ MatchInfo.Imm = AMNew.BaseOffs;
MatchInfo.Base = Base;
+ MatchInfo.Bank = getRegBank(Imm2);
return true;
}
@@ -1739,6 +1356,7 @@ void CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
MachineIRBuilder MIB(MI);
LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
+ setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
Observer.changingInstr(MI);
MI.getOperand(1).setReg(MatchInfo.Base);
MI.getOperand(2).setReg(NewOffset.getReg(0));
@@ -1762,7 +1380,7 @@ bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
Register Shl2 = MI.getOperand(1).getReg();
Register Imm1 = MI.getOperand(2).getReg();
- auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
+ auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
if (!MaybeImmVal)
return false;
@@ -1772,7 +1390,7 @@ bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
Register Base = Shl2Def->getOperand(1).getReg();
Register Imm2 = Shl2Def->getOperand(2).getReg();
- auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
+ auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
if (!MaybeImm2Val)
return false;
@@ -1856,7 +1474,7 @@ bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
// Find a matching one-use shift by constant.
const Register C1 = MI.getOperand(2).getReg();
- auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI);
+ auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
if (!MaybeImmVal)
return false;
@@ -1870,7 +1488,7 @@ bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
// Must be a constant.
auto MaybeImmVal =
- getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
if (!MaybeImmVal)
return false;
@@ -1932,8 +1550,8 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
// These were one use so it's safe to remove them.
- MatchInfo.Shift2->eraseFromParent();
- MatchInfo.Logic->eraseFromParent();
+ MatchInfo.Shift2->eraseFromParentAndMarkDBGValuesForRemoval();
+ MatchInfo.Logic->eraseFromParentAndMarkDBGValuesForRemoval();
MI.eraseFromParent();
}
@@ -1942,7 +1560,7 @@ bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
unsigned &ShiftVal) {
assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
auto MaybeImmVal =
- getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
if (!MaybeImmVal)
return false;
@@ -1977,7 +1595,7 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
// TODO: Should handle vector splat.
Register RHS = MI.getOperand(2).getReg();
- auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ auto MaybeShiftAmtVal = getIConstantVRegValWithLookThrough(RHS, MRI);
if (!MaybeShiftAmtVal)
return false;
@@ -2045,26 +1663,23 @@ bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
"Expected an unmerge");
- Register SrcReg =
- peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI);
+ auto &Unmerge = cast<GUnmerge>(MI);
+ Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
- MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
- if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES &&
- SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
- SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS)
+ auto *SrcInstr = getOpcodeDef<GMergeLikeOp>(SrcReg, MRI);
+ if (!SrcInstr)
return false;
// Check the source type of the merge.
- LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg());
- LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
+ LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
+ LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
if (SrcMergeTy != Dst0Ty && !SameSize)
return false;
// They are the same now (modulo a bitcast).
// We can collect all the src registers.
- for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx;
- ++Idx)
- Operands.push_back(SrcInstr->getOperand(Idx).getReg());
+ for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
+ Operands.push_back(SrcInstr->getSourceReg(Idx));
return true;
}
@@ -2241,7 +1856,7 @@ bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
return false;
auto MaybeImmVal =
- getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
if (!MaybeImmVal)
return false;
@@ -2410,12 +2025,12 @@ void CombinerHelper::applyCombineAddP2IToPtrAdd(
bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
int64_t &NewCst) {
- assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
+ auto &PtrAdd = cast<GPtrAdd>(MI);
+ Register LHS = PtrAdd.getBaseReg();
+ Register RHS = PtrAdd.getOffsetReg();
MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
- if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) {
+ if (auto RHSCst = getIConstantVRegSExtVal(RHS, MRI)) {
int64_t Cst;
if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
NewCst = Cst + *RHSCst;
@@ -2428,12 +2043,12 @@ bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
int64_t &NewCst) {
- assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
- Register Dst = MI.getOperand(0).getReg();
+ auto &PtrAdd = cast<GPtrAdd>(MI);
+ Register Dst = PtrAdd.getReg(0);
Builder.setInstrAndDebugLoc(MI);
Builder.buildConstant(Dst, NewCst);
- MI.eraseFromParent();
+ PtrAdd.eraseFromParent();
}
bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
@@ -2536,6 +2151,23 @@ bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc)));
}
+bool CombinerHelper::matchCombineFAbsOfFNeg(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
+ Register Src = MI.getOperand(1).getReg();
+ Register NegSrc;
+
+ if (!mi_match(Src, MRI, m_GFNeg(m_Reg(NegSrc))))
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(NegSrc);
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
bool CombinerHelper::matchCombineTruncOfExt(
MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
@@ -2587,7 +2219,7 @@ bool CombinerHelper::matchCombineTruncOfShl(
{DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) {
KnownBits Known = KB->getKnownBits(ShiftAmt);
unsigned Size = DstTy.getSizeInBits();
- if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
+ if (Known.countMaxActiveBits() <= Log2_32(Size)) {
MatchInfo = std::make_pair(ShiftSrc, ShiftAmt);
return true;
}
@@ -2644,13 +2276,13 @@ bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) {
}
bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
- assert(MI.getOpcode() == TargetOpcode::G_SELECT);
- if (auto MaybeCstCmp =
- getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) {
- OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2;
- return true;
- }
- return false;
+ GSelect &SelMI = cast<GSelect>(MI);
+ auto Cst =
+ isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI);
+ if (!Cst)
+ return false;
+ OpIdx = Cst->isZero() ? 3 : 2;
+ return true;
}
bool CombinerHelper::eraseInst(MachineInstr &MI) {
@@ -2662,12 +2294,14 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
const MachineOperand &MOP2) {
if (!MOP1.isReg() || !MOP2.isReg())
return false;
- MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI);
- if (!I1)
+ auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
+ if (!InstAndDef1)
return false;
- MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI);
- if (!I2)
+ auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
+ if (!InstAndDef2)
return false;
+ MachineInstr *I1 = InstAndDef1->MI;
+ MachineInstr *I2 = InstAndDef2->MI;
// Handle a case like this:
//
@@ -2727,15 +2361,26 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
//
// On the off-chance that there's some target instruction feeding into the
// instruction, let's use produceSameValue instead of isIdenticalTo.
- return Builder.getTII().produceSameValue(*I1, *I2, &MRI);
+ if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
+ // Handle instructions with multiple defs that produce same values. Values
+ // are same for operands with same index.
+ // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
+ // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
+ // I1 and I2 are different instructions but produce same values,
+ // %1 and %6 are same, %1 and %7 are not the same value.
+ return I1->findRegisterDefOperandIdx(InstAndDef1->Reg) ==
+ I2->findRegisterDefOperandIdx(InstAndDef2->Reg);
+ }
+ return false;
}
bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
if (!MOP.isReg())
return false;
- // MIPatternMatch doesn't let us look through G_ZEXT etc.
- auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI);
- return ValAndVReg && ValAndVReg->Value == C;
+ auto *MI = MRI.getVRegDef(MOP.getReg());
+ auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
+ return MaybeCst.hasValue() && MaybeCst->getBitWidth() <= 64 &&
+ MaybeCst->getSExtValue() == C;
}
bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
@@ -3115,14 +2760,14 @@ bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
//
// Check if we can replace AndDst with the LHS of the G_AND
if (canReplaceReg(AndDst, LHS, MRI) &&
- (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
+ (LHSBits.Zero | RHSBits.One).isAllOnes()) {
Replacement = LHS;
return true;
}
// Check if we can replace AndDst with the RHS of the G_AND
if (canReplaceReg(AndDst, RHS, MRI) &&
- (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
+ (LHSBits.One | RHSBits.Zero).isAllOnes()) {
Replacement = RHS;
return true;
}
@@ -3161,14 +2806,14 @@ bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) {
//
// Check if we can replace OrDst with the LHS of the G_OR
if (canReplaceReg(OrDst, LHS, MRI) &&
- (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
+ (LHSBits.One | RHSBits.Zero).isAllOnes()) {
Replacement = LHS;
return true;
}
// Check if we can replace OrDst with the RHS of the G_OR
if (canReplaceReg(OrDst, RHS, MRI) &&
- (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
+ (LHSBits.Zero | RHSBits.One).isAllOnes()) {
Replacement = RHS;
return true;
}
@@ -3346,7 +2991,8 @@ void CombinerHelper::applyXorOfAndWithSameReg(
}
bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
+ auto &PtrAdd = cast<GPtrAdd>(MI);
+ Register DstReg = PtrAdd.getReg(0);
LLT Ty = MRI.getType(DstReg);
const DataLayout &DL = Builder.getMF().getDataLayout();
@@ -3354,20 +3000,20 @@ bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
return false;
if (Ty.isPointer()) {
- auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI);
+ auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
return ConstVal && *ConstVal == 0;
}
assert(Ty.isVector() && "Expecting a vector type");
- const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg());
+ const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
return isBuildVectorAllZeros(*VecMI, MRI);
}
void CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
- Builder.setInstrAndDebugLoc(MI);
- Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2));
- MI.eraseFromParent();
+ auto &PtrAdd = cast<GPtrAdd>(MI);
+ Builder.setInstrAndDebugLoc(PtrAdd);
+ Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
+ PtrAdd.eraseFromParent();
}
/// The second source operand is known to be a power of 2.
@@ -3704,10 +3350,8 @@ bool CombinerHelper::matchLoadOrCombine(
// may not use index 0.
Register Ptr = LowestIdxLoad->getPointerReg();
const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
- LegalityQuery::MemDesc MMDesc;
+ LegalityQuery::MemDesc MMDesc(MMO);
MMDesc.MemoryTy = Ty;
- MMDesc.AlignInBits = MMO.getAlign().value() * 8;
- MMDesc.Ordering = MMO.getSuccessOrdering();
if (!isLegalOrBeforeLegalizer(
{TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
return false;
@@ -3732,6 +3376,274 @@ bool CombinerHelper::matchLoadOrCombine(
return true;
}
+/// Check if the store \p Store is a truncstore that can be merged. That is,
+/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty
+/// Register then it does not need to match and SrcVal is set to the source
+/// value found.
+/// On match, returns the start byte offset of the \p SrcVal that is being
+/// stored.
+static Optional<int64_t> getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
+ MachineRegisterInfo &MRI) {
+ Register TruncVal;
+ if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal))))
+ return None;
+
+ // The shift amount must be a constant multiple of the narrow type.
+ // It is translated to the offset address in the wide source value "y".
+ //
+ // x = G_LSHR y, ShiftAmtC
+ // s8 z = G_TRUNC x
+ // store z, ...
+ Register FoundSrcVal;
+ int64_t ShiftAmt;
+ if (!mi_match(TruncVal, MRI,
+ m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)),
+ m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) {
+ if (!SrcVal.isValid() || TruncVal == SrcVal) {
+ if (!SrcVal.isValid())
+ SrcVal = TruncVal;
+ return 0; // If it's the lowest index store.
+ }
+ return None;
+ }
+
+ unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();
+ if (ShiftAmt % NarrowBits!= 0)
+ return None;
+ const unsigned Offset = ShiftAmt / NarrowBits;
+
+ if (SrcVal.isValid() && FoundSrcVal != SrcVal)
+ return None;
+
+ if (!SrcVal.isValid())
+ SrcVal = FoundSrcVal;
+ else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal))
+ return None;
+ return Offset;
+}
+
+/// Match a pattern where a wide type scalar value is stored by several narrow
+/// stores. Fold it into a single store or a BSWAP and a store if the targets
+/// supports it.
+///
+/// Assuming little endian target:
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 0) & 0xFF;
+/// p[1] = (val >> 8) & 0xFF;
+/// p[2] = (val >> 16) & 0xFF;
+/// p[3] = (val >> 24) & 0xFF;
+/// =>
+/// *((i32)p) = val;
+///
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 24) & 0xFF;
+/// p[1] = (val >> 16) & 0xFF;
+/// p[2] = (val >> 8) & 0xFF;
+/// p[3] = (val >> 0) & 0xFF;
+/// =>
+/// *((i32)p) = BSWAP(val);
+bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI,
+ MergeTruncStoresInfo &MatchInfo) {
+ auto &StoreMI = cast<GStore>(MI);
+ LLT MemTy = StoreMI.getMMO().getMemoryType();
+
+ // We only handle merging simple stores of 1-4 bytes.
+ if (!MemTy.isScalar())
+ return false;
+ switch (MemTy.getSizeInBits()) {
+ case 8:
+ case 16:
+ case 32:
+ break;
+ default:
+ return false;
+ }
+ if (!StoreMI.isSimple())
+ return false;
+
+ // We do a simple search for mergeable stores prior to this one.
+ // Any potential alias hazard along the way terminates the search.
+ SmallVector<GStore *> FoundStores;
+
+ // We're looking for:
+ // 1) a (store(trunc(...)))
+ // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get
+ // the partial value stored.
+ // 3) where the offsets form either a little or big-endian sequence.
+
+ auto &LastStore = StoreMI;
+
+ // The single base pointer that all stores must use.
+ Register BaseReg;
+ int64_t LastOffset;
+ if (!mi_match(LastStore.getPointerReg(), MRI,
+ m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) {
+ BaseReg = LastStore.getPointerReg();
+ LastOffset = 0;
+ }
+
+ GStore *LowestIdxStore = &LastStore;
+ int64_t LowestIdxOffset = LastOffset;
+
+ Register WideSrcVal;
+ auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, MRI);
+ if (!LowestShiftAmt)
+ return false; // Didn't match a trunc.
+ assert(WideSrcVal.isValid());
+
+ LLT WideStoreTy = MRI.getType(WideSrcVal);
+ // The wide type might not be a multiple of the memory type, e.g. s48 and s32.
+ if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0)
+ return false;
+ const unsigned NumStoresRequired =
+ WideStoreTy.getSizeInBits() / MemTy.getSizeInBits();
+
+ SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX);
+ OffsetMap[*LowestShiftAmt] = LastOffset;
+ FoundStores.emplace_back(&LastStore);
+
+ // Search the block up for more stores.
+ // We use a search threshold of 10 instructions here because the combiner
+ // works top-down within a block, and we don't want to search an unbounded
+ // number of predecessor instructions trying to find matching stores.
+ // If we moved this optimization into a separate pass then we could probably
+ // use a more efficient search without having a hard-coded threshold.
+ const int MaxInstsToCheck = 10;
+ int NumInstsChecked = 0;
+ for (auto II = ++LastStore.getReverseIterator();
+ II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck;
+ ++II) {
+ NumInstsChecked++;
+ GStore *NewStore;
+ if ((NewStore = dyn_cast<GStore>(&*II))) {
+ if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple())
+ break;
+ } else if (II->isLoadFoldBarrier() || II->mayLoad()) {
+ break;
+ } else {
+ continue; // This is a safe instruction we can look past.
+ }
+
+ Register NewBaseReg;
+ int64_t MemOffset;
+ // Check we're storing to the same base + some offset.
+ if (!mi_match(NewStore->getPointerReg(), MRI,
+ m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) {
+ NewBaseReg = NewStore->getPointerReg();
+ MemOffset = 0;
+ }
+ if (BaseReg != NewBaseReg)
+ break;
+
+ auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, MRI);
+ if (!ShiftByteOffset)
+ break;
+ if (MemOffset < LowestIdxOffset) {
+ LowestIdxOffset = MemOffset;
+ LowestIdxStore = NewStore;
+ }
+
+ // Map the offset in the store and the offset in the combined value, and
+ // early return if it has been set before.
+ if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired ||
+ OffsetMap[*ShiftByteOffset] != INT64_MAX)
+ break;
+ OffsetMap[*ShiftByteOffset] = MemOffset;
+
+ FoundStores.emplace_back(NewStore);
+ // Reset counter since we've found a matching inst.
+ NumInstsChecked = 0;
+ if (FoundStores.size() == NumStoresRequired)
+ break;
+ }
+
+ if (FoundStores.size() != NumStoresRequired) {
+ return false;
+ }
+
+ const auto &DL = LastStore.getMF()->getDataLayout();
+ auto &C = LastStore.getMF()->getFunction().getContext();
+ // Check that a store of the wide type is both allowed and fast on the target
+ bool Fast = false;
+ bool Allowed = getTargetLowering().allowsMemoryAccess(
+ C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);
+ if (!Allowed || !Fast)
+ return false;
+
+ // Check if the pieces of the value are going to the expected places in memory
+ // to merge the stores.
+ unsigned NarrowBits = MemTy.getScalarSizeInBits();
+ auto checkOffsets = [&](bool MatchLittleEndian) {
+ if (MatchLittleEndian) {
+ for (unsigned i = 0; i != NumStoresRequired; ++i)
+ if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset)
+ return false;
+ } else { // MatchBigEndian by reversing loop counter.
+ for (unsigned i = 0, j = NumStoresRequired - 1; i != NumStoresRequired;
+ ++i, --j)
+ if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset)
+ return false;
+ }
+ return true;
+ };
+
+ // Check if the offsets line up for the native data layout of this target.
+ bool NeedBswap = false;
+ bool NeedRotate = false;
+ if (!checkOffsets(DL.isLittleEndian())) {
+ // Special-case: check if byte offsets line up for the opposite endian.
+ if (NarrowBits == 8 && checkOffsets(DL.isBigEndian()))
+ NeedBswap = true;
+ else if (NumStoresRequired == 2 && checkOffsets(DL.isBigEndian()))
+ NeedRotate = true;
+ else
+ return false;
+ }
+
+ if (NeedBswap &&
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}}))
+ return false;
+ if (NeedRotate &&
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_ROTR, {WideStoreTy}}))
+ return false;
+
+ MatchInfo.NeedBSwap = NeedBswap;
+ MatchInfo.NeedRotate = NeedRotate;
+ MatchInfo.LowestIdxStore = LowestIdxStore;
+ MatchInfo.WideSrcVal = WideSrcVal;
+ MatchInfo.FoundStores = std::move(FoundStores);
+ return true;
+}
+
+void CombinerHelper::applyTruncStoreMerge(MachineInstr &MI,
+ MergeTruncStoresInfo &MatchInfo) {
+
+ Builder.setInstrAndDebugLoc(MI);
+ Register WideSrcVal = MatchInfo.WideSrcVal;
+ LLT WideStoreTy = MRI.getType(WideSrcVal);
+
+ if (MatchInfo.NeedBSwap) {
+ WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0);
+ } else if (MatchInfo.NeedRotate) {
+ assert(WideStoreTy.getSizeInBits() % 2 == 0 &&
+ "Unexpected type for rotate");
+ auto RotAmt =
+ Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2);
+ WideSrcVal =
+ Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0);
+ }
+
+ Builder.buildStore(WideSrcVal, MatchInfo.LowestIdxStore->getPointerReg(),
+ MatchInfo.LowestIdxStore->getMMO().getPointerInfo(),
+ MatchInfo.LowestIdxStore->getMMO().getAlign());
+
+ // Erase the old stores.
+ for (auto *ST : MatchInfo.FoundStores)
+ ST->eraseFromParent();
+}
+
bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,
MachineInstr *&ExtMI) {
assert(MI.getOpcode() == TargetOpcode::G_PHI);
@@ -3844,7 +3756,7 @@ bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI,
{TargetOpcode::G_BUILD_VECTOR, {SrcTy, SrcTy.getElementType()}}))
return false;
- auto Cst = getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
return false;
@@ -3917,7 +3829,7 @@ bool CombinerHelper::matchExtractAllEltsFromBuildVector(
MRI.use_instr_nodbg_end())) {
if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
return false;
- auto Cst = getConstantVRegVal(II.getOperand(2).getReg(), MRI);
+ auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
if (!Cst)
return false;
unsigned Idx = Cst.getValue().getZExtValue();
@@ -4064,6 +3976,78 @@ bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI,
return true;
}
+bool CombinerHelper::matchICmpToLHSKnownBits(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ICMP);
+ // Given:
+ //
+ // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
+ // %cmp = G_ICMP ne %x, 0
+ //
+ // Or:
+ //
+ // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
+ // %cmp = G_ICMP eq %x, 1
+ //
+ // We can replace %cmp with %x assuming true is 1 on the target.
+ auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ if (!CmpInst::isEquality(Pred))
+ return false;
+ Register Dst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ if (getICmpTrueVal(getTargetLowering(), DstTy.isVector(),
+ /* IsFP = */ false) != 1)
+ return false;
+ int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
+ if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
+ return false;
+ Register LHS = MI.getOperand(2).getReg();
+ auto KnownLHS = KB->getKnownBits(LHS);
+ if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
+ return false;
+ // Make sure replacing Dst with the LHS is a legal operation.
+ LLT LHSTy = MRI.getType(LHS);
+ unsigned LHSSize = LHSTy.getSizeInBits();
+ unsigned DstSize = DstTy.getSizeInBits();
+ unsigned Op = TargetOpcode::COPY;
+ if (DstSize != LHSSize)
+ Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
+ if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
+ return false;
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
+ return true;
+}
+
+// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
+bool CombinerHelper::matchAndOrDisjointMask(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+
+ // Ignore vector types to simplify matching the two constants.
+ // TODO: do this for vectors and scalars via a demanded bits analysis.
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (Ty.isVector())
+ return false;
+
+ Register Src;
+ int64_t MaskAnd;
+ int64_t MaskOr;
+ if (!mi_match(MI, MRI,
+ m_GAnd(m_GOr(m_Reg(Src), m_ICst(MaskOr)), m_ICst(MaskAnd))))
+ return false;
+
+ // Check if MaskOr could turn on any bits in Src.
+ if (MaskAnd & MaskOr)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(Src);
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool CombinerHelper::matchBitfieldExtractFromSExtInReg(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
@@ -4130,6 +4114,104 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(
return true;
}
+bool CombinerHelper::matchBitfieldExtractFromShr(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ const unsigned Opcode = MI.getOpcode();
+ assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
+
+ const Register Dst = MI.getOperand(0).getReg();
+
+ const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
+ ? TargetOpcode::G_SBFX
+ : TargetOpcode::G_UBFX;
+
+ // Check if the type we would use for the extract is legal
+ LLT Ty = MRI.getType(Dst);
+ LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
+ return false;
+
+ Register ShlSrc;
+ int64_t ShrAmt;
+ int64_t ShlAmt;
+ const unsigned Size = Ty.getScalarSizeInBits();
+
+ // Try to match shr (shl x, c1), c2
+ if (!mi_match(Dst, MRI,
+ m_BinOp(Opcode,
+ m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
+ m_ICst(ShrAmt))))
+ return false;
+
+ // Make sure that the shift sizes can fit a bitfield extract
+ if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
+ return false;
+
+ // Skip this combine if the G_SEXT_INREG combine could handle it
+ if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
+ return false;
+
+ // Calculate start position and width of the extract
+ const int64_t Pos = ShrAmt - ShlAmt;
+ const int64_t Width = Size - ShrAmt;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto WidthCst = B.buildConstant(ExtractTy, Width);
+ auto PosCst = B.buildConstant(ExtractTy, Pos);
+ B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
+ };
+ return true;
+}
+
+bool CombinerHelper::matchBitfieldExtractFromShrAnd(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ const unsigned Opcode = MI.getOpcode();
+ assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
+
+ const Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ if (!getTargetLowering().isConstantUnsignedBitfieldExtactLegal(
+ TargetOpcode::G_UBFX, Ty, Ty))
+ return false;
+
+ // Try to match shr (and x, c1), c2
+ Register AndSrc;
+ int64_t ShrAmt;
+ int64_t SMask;
+ if (!mi_match(Dst, MRI,
+ m_BinOp(Opcode,
+ m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
+ m_ICst(ShrAmt))))
+ return false;
+
+ const unsigned Size = Ty.getScalarSizeInBits();
+ if (ShrAmt < 0 || ShrAmt >= Size)
+ return false;
+
+ // Check that ubfx can do the extraction, with no holes in the mask.
+ uint64_t UMask = SMask;
+ UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
+ UMask &= maskTrailingOnes<uint64_t>(Size);
+ if (!isMask_64(UMask))
+ return false;
+
+ // Calculate start position and width of the extract.
+ const int64_t Pos = ShrAmt;
+ const int64_t Width = countTrailingOnes(UMask) - ShrAmt;
+
+ // It's preferable to keep the shift, rather than form G_SBFX.
+ // TODO: remove the G_AND via demanded bits analysis.
+ if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto WidthCst = B.buildConstant(Ty, Width);
+ auto PosCst = B.buildConstant(Ty, Pos);
+ B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
+ };
+ return true;
+}
+
bool CombinerHelper::reassociationCanBreakAddressingModePattern(
MachineInstr &PtrAdd) {
assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD);
@@ -4144,10 +4226,10 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern(
if (MRI.hasOneNonDBGUse(Src1Reg))
return false;
- auto C1 = getConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
+ auto C1 = getIConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
if (!C1)
return false;
- auto C2 = getConstantVRegVal(Src2Reg, MRI);
+ auto C2 = getIConstantVRegVal(Src2Reg, MRI);
if (!C2)
return false;
@@ -4198,9 +4280,91 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern(
return false;
}
-bool CombinerHelper::matchReassocPtrAdd(
- MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
+bool CombinerHelper::matchReassocConstantInnerRHS(GPtrAdd &MI,
+ MachineInstr *RHS,
+ BuildFnTy &MatchInfo) {
+ // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
+ Register Src1Reg = MI.getOperand(1).getReg();
+ if (RHS->getOpcode() != TargetOpcode::G_ADD)
+ return false;
+ auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
+ if (!C2)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
+
+ auto NewBase =
+ Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(NewBase.getReg(0));
+ MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
+ Observer.changedInstr(MI);
+ };
+ return !reassociationCanBreakAddressingModePattern(MI);
+}
+
+bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI,
+ MachineInstr *LHS,
+ MachineInstr *RHS,
+ BuildFnTy &MatchInfo) {
+ // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
+ // if and only if (G_PTR_ADD X, C) has one use.
+ Register LHSBase;
+ Optional<ValueAndVReg> LHSCstOff;
+ if (!mi_match(MI.getBaseReg(), MRI,
+ m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
+ return false;
+
+ auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ // When we change LHSPtrAdd's offset register we might cause it to use a reg
+ // before its def. Sink the instruction so the outer PTR_ADD to ensure this
+ // doesn't happen.
+ LHSPtrAdd->moveBefore(&MI);
+ Register RHSReg = MI.getOffsetReg();
+ Observer.changingInstr(MI);
+ MI.getOperand(2).setReg(LHSCstOff->VReg);
+ Observer.changedInstr(MI);
+ Observer.changingInstr(*LHSPtrAdd);
+ LHSPtrAdd->getOperand(2).setReg(RHSReg);
+ Observer.changedInstr(*LHSPtrAdd);
+ };
+ return !reassociationCanBreakAddressingModePattern(MI);
+}
+
+bool CombinerHelper::matchReassocFoldConstantsInSubTree(GPtrAdd &MI,
+ MachineInstr *LHS,
+ MachineInstr *RHS,
+ BuildFnTy &MatchInfo) {
+ // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
+ auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
+ if (!LHSPtrAdd)
+ return false;
+
+ Register Src2Reg = MI.getOperand(2).getReg();
+ Register LHSSrc1 = LHSPtrAdd->getBaseReg();
+ Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
+ auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
+ if (!C1)
+ return false;
+ auto C2 = getIConstantVRegVal(Src2Reg, MRI);
+ if (!C2)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(LHSSrc1);
+ MI.getOperand(2).setReg(NewCst.getReg(0));
+ Observer.changedInstr(MI);
+ };
+ return !reassociationCanBreakAddressingModePattern(MI);
+}
+
+bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ auto &PtrAdd = cast<GPtrAdd>(MI);
// We're trying to match a few pointer computation patterns here for
// re-association opportunities.
// 1) Isolating a constant operand to be on the RHS, e.g.:
@@ -4209,49 +4373,26 @@ bool CombinerHelper::matchReassocPtrAdd(
// 2) Folding two constants in each sub-tree as long as such folding
// doesn't break a legal addressing mode.
// G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
- Register Src1Reg = MI.getOperand(1).getReg();
- Register Src2Reg = MI.getOperand(2).getReg();
- MachineInstr *LHS = MRI.getVRegDef(Src1Reg);
- MachineInstr *RHS = MRI.getVRegDef(Src2Reg);
-
- if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) {
- // Try to match example 1).
- if (RHS->getOpcode() != TargetOpcode::G_ADD)
- return false;
- auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
- if (!C2)
- return false;
+ //
+ // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
+ // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
+ // iif (G_PTR_ADD X, C) has one use.
+ MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
+ MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
+
+ // Try to match example 2.
+ if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
+ return true;
- MatchInfo = [=,&MI](MachineIRBuilder &B) {
- LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
+ // Try to match example 3.
+ if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
+ return true;
- auto NewBase =
- Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
- Observer.changingInstr(MI);
- MI.getOperand(1).setReg(NewBase.getReg(0));
- MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
- Observer.changedInstr(MI);
- };
- } else {
- // Try to match example 2.
- Register LHSSrc1 = LHS->getOperand(1).getReg();
- Register LHSSrc2 = LHS->getOperand(2).getReg();
- auto C1 = getConstantVRegVal(LHSSrc2, MRI);
- if (!C1)
- return false;
- auto C2 = getConstantVRegVal(Src2Reg, MRI);
- if (!C2)
- return false;
+ // Try to match example 1.
+ if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
+ return true;
- MatchInfo = [=, &MI](MachineIRBuilder &B) {
- auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
- Observer.changingInstr(MI);
- MI.getOperand(1).setReg(LHSSrc1);
- MI.getOperand(2).setReg(NewCst.getReg(0));
- Observer.changedInstr(MI);
- };
- }
- return !reassociationCanBreakAddressingModePattern(MI);
+ return false;
}
bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
@@ -4264,6 +4405,361 @@ bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
return true;
}
+bool CombinerHelper::matchNarrowBinopFeedingAnd(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ // Look for a binop feeding into an AND with a mask:
+ //
+ // %add = G_ADD %lhs, %rhs
+ // %and = G_AND %add, 000...11111111
+ //
+ // Check if it's possible to perform the binop at a narrower width and zext
+ // back to the original width like so:
+ //
+ // %narrow_lhs = G_TRUNC %lhs
+ // %narrow_rhs = G_TRUNC %rhs
+ // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
+ // %new_add = G_ZEXT %narrow_add
+ // %and = G_AND %new_add, 000...11111111
+ //
+ // This can allow later combines to eliminate the G_AND if it turns out
+ // that the mask is irrelevant.
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+ Register Dst = MI.getOperand(0).getReg();
+ Register AndLHS = MI.getOperand(1).getReg();
+ Register AndRHS = MI.getOperand(2).getReg();
+ LLT WideTy = MRI.getType(Dst);
+
+ // If the potential binop has more than one use, then it's possible that one
+ // of those uses will need its full width.
+ if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
+ return false;
+
+ // Check if the LHS feeding the AND is impacted by the high bits that we're
+ // masking out.
+ //
+ // e.g. for 64-bit x, y:
+ //
+ // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
+ MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
+ if (!LHSInst)
+ return false;
+ unsigned LHSOpc = LHSInst->getOpcode();
+ switch (LHSOpc) {
+ default:
+ return false;
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR:
+ break;
+ }
+
+ // Find the mask on the RHS.
+ auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
+ if (!Cst)
+ return false;
+ auto Mask = Cst->Value;
+ if (!Mask.isMask())
+ return false;
+
+ // No point in combining if there's nothing to truncate.
+ unsigned NarrowWidth = Mask.countTrailingOnes();
+ if (NarrowWidth == WideTy.getSizeInBits())
+ return false;
+ LLT NarrowTy = LLT::scalar(NarrowWidth);
+
+ // Check if adding the zext + truncates could be harmful.
+ auto &MF = *MI.getMF();
+ const auto &TLI = getTargetLowering();
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ auto &DL = MF.getDataLayout();
+ if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) ||
+ !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx))
+ return false;
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
+ return false;
+ Register BinOpLHS = LHSInst->getOperand(1).getReg();
+ Register BinOpRHS = LHSInst->getOperand(2).getReg();
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
+ auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
+ auto NarrowBinOp =
+ Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
+ auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(Ext.getReg(0));
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
+bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ unsigned Opc = MI.getOpcode();
+ assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
+ // Check for a constant 2 or a splat of 2 on the RHS.
+ auto RHS = MI.getOperand(3).getReg();
+ bool IsVector = MRI.getType(RHS).isVector();
+ if (!IsVector && !mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(2)))
+ return false;
+ if (IsVector) {
+ // FIXME: There's no mi_match pattern for this yet.
+ auto *RHSDef = getDefIgnoringCopies(RHS, MRI);
+ if (!RHSDef)
+ return false;
+ auto Splat = getBuildVectorConstantSplat(*RHSDef, MRI);
+ if (!Splat || *Splat != 2)
+ return false;
+ }
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Observer.changingInstr(MI);
+ unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
+ : TargetOpcode::G_SADDO;
+ MI.setDesc(Builder.getTII().get(NewOpc));
+ MI.getOperand(3).setReg(MI.getOperand(2).getReg());
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
+MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UDIV);
+ auto &UDiv = cast<GenericMachineInstr>(MI);
+ Register Dst = UDiv.getReg(0);
+ Register LHS = UDiv.getReg(1);
+ Register RHS = UDiv.getReg(2);
+ LLT Ty = MRI.getType(Dst);
+ LLT ScalarTy = Ty.getScalarType();
+ const unsigned EltBits = ScalarTy.getScalarSizeInBits();
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
+ auto &MIB = Builder;
+ MIB.setInstrAndDebugLoc(MI);
+
+ bool UseNPQ = false;
+ SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
+
+ auto BuildUDIVPattern = [&](const Constant *C) {
+ auto *CI = cast<ConstantInt>(C);
+ const APInt &Divisor = CI->getValue();
+ UnsignedDivisonByConstantInfo magics =
+ UnsignedDivisonByConstantInfo::get(Divisor);
+ unsigned PreShift = 0, PostShift = 0;
+
+ // If the divisor is even, we can avoid using the expensive fixup by
+ // shifting the divided value upfront.
+ if (magics.IsAdd != 0 && !Divisor[0]) {
+ PreShift = Divisor.countTrailingZeros();
+ // Get magic number for the shifted divisor.
+ magics =
+ UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
+ assert(magics.IsAdd == 0 && "Should use cheap fixup now");
+ }
+
+ APInt Magic = magics.Magic;
+
+ unsigned SelNPQ;
+ if (magics.IsAdd == 0 || Divisor.isOneValue()) {
+ assert(magics.ShiftAmount < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ PostShift = magics.ShiftAmount;
+ SelNPQ = false;
+ } else {
+ PostShift = magics.ShiftAmount - 1;
+ SelNPQ = true;
+ }
+
+ PreShifts.push_back(
+ MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
+ MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
+ NPQFactors.push_back(
+ MIB.buildConstant(ScalarTy,
+ SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
+ : APInt::getZero(EltBits))
+ .getReg(0));
+ PostShifts.push_back(
+ MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
+ UseNPQ |= SelNPQ;
+ return true;
+ };
+
+ // Collect the shifts/magic values from each element.
+ bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
+ (void)Matched;
+ assert(Matched && "Expected unary predicate match to succeed");
+
+ Register PreShift, PostShift, MagicFactor, NPQFactor;
+ auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
+ if (RHSDef) {
+ PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
+ MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
+ NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
+ PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
+ } else {
+ assert(MRI.getType(RHS).isScalar() &&
+ "Non-build_vector operation should have been a scalar");
+ PreShift = PreShifts[0];
+ MagicFactor = MagicFactors[0];
+ PostShift = PostShifts[0];
+ }
+
+ Register Q = LHS;
+ Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
+
+ // Multiply the numerator (operand 0) by the magic value.
+ Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
+
+ if (UseNPQ) {
+ Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
+
+ // For vectors we might have a mix of non-NPQ/NPQ paths, so use
+ // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
+ if (Ty.isVector())
+ NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
+ else
+ NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
+
+ Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
+ }
+
+ Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
+ auto One = MIB.buildConstant(Ty, 1);
+ auto IsOne = MIB.buildICmp(
+ CmpInst::Predicate::ICMP_EQ,
+ Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
+ return MIB.buildSelect(Ty, IsOne, LHS, Q);
+}
+
+bool CombinerHelper::matchUDivByConst(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UDIV);
+ Register Dst = MI.getOperand(0).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ auto *RHSDef = MRI.getVRegDef(RHS);
+ if (!isConstantOrConstantVector(*RHSDef, MRI))
+ return false;
+
+ auto &MF = *MI.getMF();
+ AttributeList Attr = MF.getFunction().getAttributes();
+ const auto &TLI = getTargetLowering();
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ auto &DL = MF.getDataLayout();
+ if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
+ return false;
+
+ // Don't do this for minsize because the instruction sequence is usually
+ // larger.
+ if (MF.getFunction().hasMinSize())
+ return false;
+
+ // Don't do this if the types are not going to be legal.
+ if (LI) {
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
+ return false;
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
+ return false;
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_ICMP,
+ {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
+ DstTy}}))
+ return false;
+ }
+
+ auto CheckEltValue = [&](const Constant *C) {
+ if (auto *CI = dyn_cast_or_null<ConstantInt>(C))
+ return !CI->isZero();
+ return false;
+ };
+ return matchUnaryPredicate(MRI, RHS, CheckEltValue);
+}
+
+void CombinerHelper::applyUDivByConst(MachineInstr &MI) {
+ auto *NewMI = buildUDivUsingMul(MI);
+ replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
+}
+
+bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UMULH);
+ Register RHS = MI.getOperand(2).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ auto MatchPow2ExceptOne = [&](const Constant *C) {
+ if (auto *CI = dyn_cast<ConstantInt>(C))
+ return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
+ return false;
+ };
+ if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
+ return false;
+ return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}});
+}
+
+void CombinerHelper::applyUMulHToLShr(MachineInstr &MI) {
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ unsigned NumEltBits = Ty.getScalarSizeInBits();
+
+ Builder.setInstrAndDebugLoc(MI);
+ auto LogBase2 = buildLogBase2(RHS, Builder);
+ auto ShiftAmt =
+ Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
+ auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
+ Builder.buildLShr(Dst, LHS, Trunc);
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ unsigned Opc = MI.getOpcode();
+ assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
+ Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
+ Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register X = MI.getOperand(1).getReg();
+ Register Y = MI.getOperand(2).getReg();
+ LLT Type = MRI.getType(Dst);
+
+ // fold (fadd x, fneg(y)) -> (fsub x, y)
+ // fold (fadd fneg(y), x) -> (fsub x, y)
+ // G_ADD is commutative so both cases are checked by m_GFAdd
+ if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
+ Opc = TargetOpcode::G_FSUB;
+ }
+ /// fold (fsub x, fneg(y)) -> (fadd x, y)
+ else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
+ Opc = TargetOpcode::G_FADD;
+ }
+ // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
+ // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
+ // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
+ // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
+ else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
+ Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
+ mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
+ mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
+ // no opcode change
+ } else
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Observer.changingInstr(MI);
+ MI.setDesc(B.getTII().get(Opc));
+ MI.getOperand(1).setReg(X);
+ MI.getOperand(2).setReg(Y);
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 8146a67d4dfb..306af808659a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -9,7 +9,7 @@
/// Provides analysis for querying information about KnownBits during GISel
/// passes.
//
-//===------------------
+//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
@@ -57,7 +57,7 @@ KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) {
KnownBits GISelKnownBits::getKnownBits(Register R) {
const LLT Ty = MRI.getType(R);
APInt DemandedElts =
- Ty.isVector() ? APInt::getAllOnesValue(Ty.getNumElements()) : APInt(1, 1);
+ Ty.isVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
return getKnownBits(R, DemandedElts);
}
@@ -198,8 +198,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
case TargetOpcode::COPY:
case TargetOpcode::G_PHI:
case TargetOpcode::PHI: {
- Known.One = APInt::getAllOnesValue(BitWidth);
- Known.Zero = APInt::getAllOnesValue(BitWidth);
+ Known.One = APInt::getAllOnes(BitWidth);
+ Known.Zero = APInt::getAllOnes(BitWidth);
// Destination registers should not have subregisters at this
// point of the pipeline, otherwise the main live-range will be
// defined more than once, which is against SSA.
@@ -245,7 +245,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
break;
}
case TargetOpcode::G_CONSTANT: {
- auto CstVal = getConstantVRegVal(R, MRI);
+ auto CstVal = getIConstantVRegVal(R, MRI);
if (!CstVal)
break;
Known = KnownBits::makeConstant(*CstVal);
@@ -510,6 +510,18 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Known = Known.reverseBits();
break;
}
+ case TargetOpcode::G_CTPOP: {
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+ // We can bound the space the count needs. Also, bits known to be zero can't
+ // contribute to the population.
+ unsigned BitsPossiblySet = Known2.countMaxPopulation();
+ unsigned LowBits = Log2_32(BitsPossiblySet)+1;
+ Known.Zero.setBitsFrom(LowBits);
+ // TODO: we could bound Known.One using the lower bound on the number of
+ // bits which might be set provided by popcnt KnownOne2.
+ break;
+ }
case TargetOpcode::G_UBFX: {
KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
@@ -676,9 +688,8 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) {
LLT Ty = MRI.getType(R);
- APInt DemandedElts = Ty.isVector()
- ? APInt::getAllOnesValue(Ty.getNumElements())
- : APInt(1, 1);
+ APInt DemandedElts =
+ Ty.isVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
return computeNumSignBits(R, DemandedElts, Depth);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
index e0391e6f6467..252b931602c6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
@@ -18,6 +18,7 @@ using namespace llvm;
void llvm::initializeGlobalISel(PassRegistry &Registry) {
initializeIRTranslatorPass(Registry);
initializeLegalizerPass(Registry);
+ initializeLoadStoreOptPass(Registry);
initializeLocalizerPass(Registry);
initializeRegBankSelectPass(Registry);
initializeInstructionSelectPass(Registry);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 73b763710fdf..87cc60d51bc2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -32,6 +33,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
@@ -47,6 +49,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InlineAsm.h"
@@ -114,7 +117,7 @@ static void reportTranslationError(MachineFunction &MF,
R << (" (in function: " + MF.getName() + ")").str();
if (TPC.isGlobalISelAbortEnabled())
- report_fatal_error(R.getMsg());
+ report_fatal_error(Twine(R.getMsg()));
else
ORE.emit(R);
}
@@ -566,7 +569,7 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
if (BrInst.isUnconditional()) {
// If the unconditional target is the layout successor, fallthrough.
- if (!CurMBB.isLayoutSuccessor(Succ0MBB))
+ if (OptLevel == CodeGenOpt::None || !CurMBB.isLayoutSuccessor(Succ0MBB))
MIRBuilder.buildBr(*Succ0MBB);
// Link successors.
@@ -739,8 +742,7 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
// FIXME: At the moment we don't do any splitting optimizations here like
// SelectionDAG does, so this worklist only has one entry.
while (!WorkList.empty()) {
- SwitchWorkListItem W = WorkList.back();
- WorkList.pop_back();
+ SwitchWorkListItem W = WorkList.pop_back_val();
if (!lowerSwitchWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB, MIB))
return false;
}
@@ -784,7 +786,7 @@ bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT,
JT.Reg = Sub.getReg(0);
- if (JTH.OmitRangeCheck) {
+ if (JTH.FallthroughUnreachable) {
if (JT.MBB != HeaderBB->getNextNode())
MIB.buildBr(*JT.MBB);
return true;
@@ -936,11 +938,10 @@ bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W,
}
}
- // Skip the range check if the fallthrough block is unreachable.
if (FallthroughUnreachable)
- JTH->OmitRangeCheck = true;
+ JTH->FallthroughUnreachable = true;
- if (!JTH->OmitRangeCheck)
+ if (!JTH->FallthroughUnreachable)
addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
CurMBB->normalizeSuccProbs();
@@ -1004,14 +1005,22 @@ void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0);
auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg);
- // Ensure that the type will fit the mask value.
+ Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
+ const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
+
LLT MaskTy = SwitchOpTy;
- for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) {
- if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) {
- // Switch table case range are encoded into series of masks.
- // Just use pointer type, it's guaranteed to fit.
- MaskTy = LLT::scalar(64);
- break;
+ if (MaskTy.getSizeInBits() > PtrTy.getSizeInBits() ||
+ !isPowerOf2_32(MaskTy.getSizeInBits()))
+ MaskTy = LLT::scalar(PtrTy.getSizeInBits());
+ else {
+ // Ensure that the type will fit the mask value.
+ for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) {
+ if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) {
+ // Switch table case range are encoded into series of masks.
+ // Just use pointer type, it's guaranteed to fit.
+ MaskTy = LLT::scalar(PtrTy.getSizeInBits());
+ break;
+ }
}
}
Register SubReg = RangeSub.getReg(0);
@@ -1023,13 +1032,13 @@ void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
MachineBasicBlock *MBB = B.Cases[0].ThisBB;
- if (!B.OmitRangeCheck)
+ if (!B.FallthroughUnreachable)
addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
addSuccessorWithProb(SwitchBB, MBB, B.Prob);
SwitchBB->normalizeSuccProbs();
- if (!B.OmitRangeCheck) {
+ if (!B.FallthroughUnreachable) {
// Conditional branch to the default block.
auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range);
auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1),
@@ -1129,10 +1138,8 @@ bool IRTranslator::lowerBitTestWorkItem(
BTB->DefaultProb -= DefaultProb / 2;
}
- if (FallthroughUnreachable) {
- // Skip the range check if the fallthrough block is unreachable.
- BTB->OmitRangeCheck = true;
- }
+ if (FallthroughUnreachable)
+ BTB->FallthroughUnreachable = true;
// If we're in the right place, emit the bit test header right now.
if (CurMBB == SwitchMBB) {
@@ -1297,11 +1304,9 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8);
Align BaseAlign = getMemOpAlign(LI);
- AAMDNodes AAMetadata;
- LI.getAAMetadata(AAMetadata);
auto MMO = MF->getMachineMemOperand(
Ptr, Flags, MRI->getType(Regs[i]),
- commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, Ranges,
+ commonAlignment(BaseAlign, Offsets[i] / 8), LI.getAAMetadata(), Ranges,
LI.getSyncScopeID(), LI.getOrdering());
MIRBuilder.buildLoad(Regs[i], Addr, *MMO);
}
@@ -1339,11 +1344,9 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8);
Align BaseAlign = getMemOpAlign(SI);
- AAMDNodes AAMetadata;
- SI.getAAMetadata(AAMetadata);
auto MMO = MF->getMachineMemOperand(
Ptr, Flags, MRI->getType(Vals[i]),
- commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, nullptr,
+ commonAlignment(BaseAlign, Offsets[i] / 8), SI.getAAMetadata(), nullptr,
SI.getSyncScopeID(), SI.getOrdering());
MIRBuilder.buildStore(Vals[i], Addr, *MMO);
}
@@ -1590,8 +1593,7 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
Align DstAlign;
Align SrcAlign;
unsigned IsVol =
- cast<ConstantInt>(CI.getArgOperand(CI.getNumArgOperands() - 1))
- ->getZExtValue();
+ cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 1))->getZExtValue();
if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) {
DstAlign = MCI->getDestAlign().valueOrOne();
@@ -1763,6 +1765,10 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_VECREDUCE_UMAX;
case Intrinsic::vector_reduce_umin:
return TargetOpcode::G_VECREDUCE_UMIN;
+ case Intrinsic::lround:
+ return TargetOpcode::G_LROUND;
+ case Intrinsic::llround:
+ return TargetOpcode::G_LLROUND;
}
return Intrinsic::not_intrinsic;
}
@@ -1779,7 +1785,7 @@ bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI,
// Yes. Let's translate it.
SmallVector<llvm::SrcOp, 4> VRegs;
- for (auto &Arg : CI.arg_operands())
+ for (auto &Arg : CI.args())
VRegs.push_back(getOrCreateVReg(*Arg));
MIRBuilder.buildInstr(Op, {getOrCreateVReg(CI)}, VRegs,
@@ -2172,7 +2178,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
// Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
// is the same on all targets.
- for (unsigned Idx = 0, E = CI.getNumArgOperands(); Idx < E; ++Idx) {
+ for (unsigned Idx = 0, E = CI.arg_size(); Idx < E; ++Idx) {
Value *Arg = CI.getArgOperand(Idx)->stripPointerCasts();
if (isa<ConstantPointerNull>(Arg))
continue; // Skip null pointers. They represent a hole in index space.
@@ -2228,6 +2234,23 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return true;
}
+ case Intrinsic::trap:
+ case Intrinsic::debugtrap:
+ case Intrinsic::ubsantrap: {
+ StringRef TrapFuncName =
+ CI.getAttributes().getFnAttr("trap-func-name").getValueAsString();
+ if (TrapFuncName.empty())
+ break; // Use the default handling.
+ CallLowering::CallLoweringInfo Info;
+ if (ID == Intrinsic::ubsantrap) {
+ Info.OrigArgs.push_back({getOrCreateVRegs(*CI.getArgOperand(0)),
+ CI.getArgOperand(0)->getType(), 0});
+ }
+ Info.Callee = MachineOperand::CreateES(TrapFuncName.data());
+ Info.CB = &CI;
+ Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0};
+ return CLI->lowerCall(MIRBuilder, Info);
+ }
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
@@ -2321,6 +2344,8 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
if (CI.isInlineAsm())
return translateInlineAsm(CI, MIRBuilder);
+ diagnoseDontCall(CI);
+
Intrinsic::ID ID = Intrinsic::not_intrinsic;
if (F && F->isIntrinsic()) {
ID = F->getIntrinsicID();
@@ -2347,7 +2372,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
if (isa<FPMathOperator>(CI))
MIB->copyIRFlags(CI);
- for (auto &Arg : enumerate(CI.arg_operands())) {
+ for (auto &Arg : enumerate(CI.args())) {
// If this is required to be an immediate, don't materialize it in a
// register.
if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
@@ -2360,10 +2385,15 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
} else {
MIB.addFPImm(cast<ConstantFP>(Arg.value()));
}
- } else if (auto MD = dyn_cast<MetadataAsValue>(Arg.value())) {
- auto *MDN = dyn_cast<MDNode>(MD->getMetadata());
- if (!MDN) // This was probably an MDString.
- return false;
+ } else if (auto *MDVal = dyn_cast<MetadataAsValue>(Arg.value())) {
+ auto *MD = MDVal->getMetadata();
+ auto *MDN = dyn_cast<MDNode>(MD);
+ if (!MDN) {
+ if (auto *ConstMD = dyn_cast<ConstantAsMetadata>(MD))
+ MDN = MDNode::get(MF->getFunction().getContext(), ConstMD);
+ else // This was probably an MDString.
+ return false;
+ }
MIB.addMetadata(MDN);
} else {
ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value());
@@ -2472,32 +2502,19 @@ bool IRTranslator::translateInvoke(const User &U,
if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI()))
return false;
- bool LowerInlineAsm = false;
- if (I.isInlineAsm()) {
- const InlineAsm *IA = cast<InlineAsm>(I.getCalledOperand());
- if (!IA->canThrow()) {
- // Fast path without emitting EH_LABELs.
-
- if (!translateInlineAsm(I, MIRBuilder))
- return false;
-
- MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB(),
- *ReturnMBB = &getMBB(*ReturnBB);
-
- // Update successor info.
- addSuccessorWithProb(InvokeMBB, ReturnMBB, BranchProbability::getOne());
-
- MIRBuilder.buildBr(*ReturnMBB);
- return true;
- } else {
- LowerInlineAsm = true;
- }
- }
+ bool LowerInlineAsm = I.isInlineAsm();
+ bool NeedEHLabel = true;
+ // If it can't throw then use a fast-path without emitting EH labels.
+ if (LowerInlineAsm)
+ NeedEHLabel = (cast<InlineAsm>(I.getCalledOperand()))->canThrow();
// Emit the actual call, bracketed by EH_LABELs so that the MF knows about
// the region covered by the try.
- MCSymbol *BeginSymbol = Context.createTempSymbol();
- MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
+ MCSymbol *BeginSymbol = nullptr;
+ if (NeedEHLabel) {
+ BeginSymbol = Context.createTempSymbol();
+ MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
+ }
if (LowerInlineAsm) {
if (!translateInlineAsm(I, MIRBuilder))
@@ -2505,8 +2522,11 @@ bool IRTranslator::translateInvoke(const User &U,
} else if (!translateCallBase(I, MIRBuilder))
return false;
- MCSymbol *EndSymbol = Context.createTempSymbol();
- MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
+ MCSymbol *EndSymbol = nullptr;
+ if (NeedEHLabel) {
+ EndSymbol = Context.createTempSymbol();
+ MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
+ }
SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
BranchProbabilityInfo *BPI = FuncInfo.BPI;
@@ -2528,7 +2548,12 @@ bool IRTranslator::translateInvoke(const User &U,
}
InvokeMBB->normalizeSuccProbs();
- MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
+ if (NeedEHLabel) {
+ assert(BeginSymbol && "Expected a begin symbol!");
+ assert(EndSymbol && "Expected an end symbol!");
+ MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
+ }
+
MIRBuilder.buildBr(ReturnMBB);
return true;
}
@@ -2670,6 +2695,28 @@ bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) {
return true;
}
+bool IRTranslator::translateUnreachable(const User &U, MachineIRBuilder &MIRBuilder) {
+ if (!MF->getTarget().Options.TrapUnreachable)
+ return true;
+
+ auto &UI = cast<UnreachableInst>(U);
+ // We may be able to ignore unreachable behind a noreturn call.
+ if (MF->getTarget().Options.NoTrapAfterNoreturn) {
+ const BasicBlock &BB = *UI.getParent();
+ if (&UI != &BB.front()) {
+ BasicBlock::const_iterator PredI =
+ std::prev(BasicBlock::const_iterator(UI));
+ if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) {
+ if (Call->doesNotReturn())
+ return true;
+ }
+ }
+ }
+
+ MIRBuilder.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>(), true);
+ return true;
+}
+
bool IRTranslator::translateInsertElement(const User &U,
MachineIRBuilder &MIRBuilder) {
// If it is a <1 x Ty> vector, use the scalar as it is
@@ -2757,14 +2804,11 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U,
Register Cmp = getOrCreateVReg(*I.getCompareOperand());
Register NewVal = getOrCreateVReg(*I.getNewValOperand());
- AAMDNodes AAMetadata;
- I.getAAMetadata(AAMetadata);
-
MIRBuilder.buildAtomicCmpXchgWithSuccess(
OldValRes, SuccessRes, Addr, Cmp, NewVal,
*MF->getMachineMemOperand(
MachinePointerInfo(I.getPointerOperand()), Flags, MRI->getType(Cmp),
- getMemOpAlign(I), AAMetadata, nullptr, I.getSyncScopeID(),
+ getMemOpAlign(I), I.getAAMetadata(), nullptr, I.getSyncScopeID(),
I.getSuccessOrdering(), I.getFailureOrdering()));
return true;
}
@@ -2824,14 +2868,11 @@ bool IRTranslator::translateAtomicRMW(const User &U,
break;
}
- AAMDNodes AAMetadata;
- I.getAAMetadata(AAMetadata);
-
MIRBuilder.buildAtomicRMW(
Opcode, Res, Addr, Val,
*MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
Flags, MRI->getType(Val), getMemOpAlign(I),
- AAMetadata, nullptr, I.getSyncScopeID(),
+ I.getAAMetadata(), nullptr, I.getSyncScopeID(),
I.getOrdering()));
return true;
}
@@ -2985,7 +3026,8 @@ bool IRTranslator::translate(const Constant &C, Register Reg) {
return true;
}
-void IRTranslator::finalizeBasicBlock() {
+bool IRTranslator::finalizeBasicBlock(const BasicBlock &BB,
+ MachineBasicBlock &MBB) {
for (auto &BTB : SL->BitTestCases) {
// Emit header first, if it wasn't already emitted.
if (!BTB.Emitted)
@@ -3005,7 +3047,7 @@ void IRTranslator::finalizeBasicBlock() {
// test, and delete the last bit test.
MachineBasicBlock *NextMBB;
- if (BTB.ContiguousRange && j + 2 == ej) {
+ if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {
// Second-to-last bit-test with contiguous range: fall through to the
// target of the final bit test.
NextMBB = BTB.Cases[j + 1].TargetBB;
@@ -3019,7 +3061,7 @@ void IRTranslator::finalizeBasicBlock() {
emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB);
- if (BTB.ContiguousRange && j + 2 == ej) {
+ if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {
// We need to record the replacement phi edge here that normally
// happens in emitBitTestCase before we delete the case, otherwise the
// phi edge will be lost.
@@ -3054,6 +3096,176 @@ void IRTranslator::finalizeBasicBlock() {
for (auto &SwCase : SL->SwitchCases)
emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder);
SL->SwitchCases.clear();
+
+ // Check if we need to generate stack-protector guard checks.
+ StackProtector &SP = getAnalysis<StackProtector>();
+ if (SP.shouldEmitSDCheck(BB)) {
+ const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
+ bool FunctionBasedInstrumentation =
+ TLI.getSSPStackGuardCheck(*MF->getFunction().getParent());
+ SPDescriptor.initialize(&BB, &MBB, FunctionBasedInstrumentation);
+ }
+ // Handle stack protector.
+ if (SPDescriptor.shouldEmitFunctionBasedCheckStackProtector()) {
+ LLVM_DEBUG(dbgs() << "Unimplemented stack protector case\n");
+ return false;
+ } else if (SPDescriptor.shouldEmitStackProtector()) {
+ MachineBasicBlock *ParentMBB = SPDescriptor.getParentMBB();
+ MachineBasicBlock *SuccessMBB = SPDescriptor.getSuccessMBB();
+
+ // Find the split point to split the parent mbb. At the same time copy all
+ // physical registers used in the tail of parent mbb into virtual registers
+ // before the split point and back into physical registers after the split
+ // point. This prevents us needing to deal with Live-ins and many other
+ // register allocation issues caused by us splitting the parent mbb. The
+ // register allocator will clean up said virtual copies later on.
+ MachineBasicBlock::iterator SplitPoint = findSplitPointForStackProtector(
+ ParentMBB, *MF->getSubtarget().getInstrInfo());
+
+ // Splice the terminator of ParentMBB into SuccessMBB.
+ SuccessMBB->splice(SuccessMBB->end(), ParentMBB, SplitPoint,
+ ParentMBB->end());
+
+ // Add compare/jump on neq/jump to the parent BB.
+ if (!emitSPDescriptorParent(SPDescriptor, ParentMBB))
+ return false;
+
+ // CodeGen Failure MBB if we have not codegened it yet.
+ MachineBasicBlock *FailureMBB = SPDescriptor.getFailureMBB();
+ if (FailureMBB->empty()) {
+ if (!emitSPDescriptorFailure(SPDescriptor, FailureMBB))
+ return false;
+ }
+
+ // Clear the Per-BB State.
+ SPDescriptor.resetPerBBState();
+ }
+ return true;
+}
+
+bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD,
+ MachineBasicBlock *ParentBB) {
+ CurBuilder->setInsertPt(*ParentBB, ParentBB->end());
+ // First create the loads to the guard/stack slot for the comparison.
+ const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
+ Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
+ const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
+ LLT PtrMemTy = getLLTForMVT(TLI.getPointerMemTy(*DL));
+
+ MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
+ int FI = MFI.getStackProtectorIndex();
+
+ Register Guard;
+ Register StackSlotPtr = CurBuilder->buildFrameIndex(PtrTy, FI).getReg(0);
+ const Module &M = *ParentBB->getParent()->getFunction().getParent();
+ Align Align = DL->getPrefTypeAlign(Type::getInt8PtrTy(M.getContext()));
+
+ // Generate code to load the content of the guard slot.
+ Register GuardVal =
+ CurBuilder
+ ->buildLoad(PtrMemTy, StackSlotPtr,
+ MachinePointerInfo::getFixedStack(*MF, FI), Align,
+ MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile)
+ .getReg(0);
+
+ if (TLI.useStackGuardXorFP()) {
+ LLVM_DEBUG(dbgs() << "Stack protector xor'ing with FP not yet implemented");
+ return false;
+ }
+
+ // Retrieve guard check function, nullptr if instrumentation is inlined.
+ if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) {
+ // This path is currently untestable on GlobalISel, since the only platform
+ // that needs this seems to be Windows, and we fall back on that currently.
+ // The code still lives here in case that changes.
+ // Silence warning about unused variable until the code below that uses
+ // 'GuardCheckFn' is enabled.
+ (void)GuardCheckFn;
+ return false;
+#if 0
+ // The target provides a guard check function to validate the guard value.
+ // Generate a call to that function with the content of the guard slot as
+ // argument.
+ FunctionType *FnTy = GuardCheckFn->getFunctionType();
+ assert(FnTy->getNumParams() == 1 && "Invalid function signature");
+ ISD::ArgFlagsTy Flags;
+ if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg))
+ Flags.setInReg();
+ CallLowering::ArgInfo GuardArgInfo(
+ {GuardVal, FnTy->getParamType(0), {Flags}});
+
+ CallLowering::CallLoweringInfo Info;
+ Info.OrigArgs.push_back(GuardArgInfo);
+ Info.CallConv = GuardCheckFn->getCallingConv();
+ Info.Callee = MachineOperand::CreateGA(GuardCheckFn, 0);
+ Info.OrigRet = {Register(), FnTy->getReturnType()};
+ if (!CLI->lowerCall(MIRBuilder, Info)) {
+ LLVM_DEBUG(dbgs() << "Failed to lower call to stack protector check\n");
+ return false;
+ }
+ return true;
+#endif
+ }
+
+ // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
+ // Otherwise, emit a volatile load to retrieve the stack guard value.
+ if (TLI.useLoadStackGuardNode()) {
+ Guard =
+ MRI->createGenericVirtualRegister(LLT::scalar(PtrTy.getSizeInBits()));
+ getStackGuard(Guard, *CurBuilder);
+ } else {
+ // TODO: test using android subtarget when we support @llvm.thread.pointer.
+ const Value *IRGuard = TLI.getSDagStackGuard(M);
+ Register GuardPtr = getOrCreateVReg(*IRGuard);
+
+ Guard = CurBuilder
+ ->buildLoad(PtrMemTy, GuardPtr,
+ MachinePointerInfo::getFixedStack(*MF, FI), Align,
+ MachineMemOperand::MOLoad |
+ MachineMemOperand::MOVolatile)
+ .getReg(0);
+ }
+
+ // Perform the comparison.
+ auto Cmp =
+ CurBuilder->buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Guard, GuardVal);
+ // If the guard/stackslot do not equal, branch to failure MBB.
+ CurBuilder->buildBrCond(Cmp, *SPD.getFailureMBB());
+ // Otherwise branch to success MBB.
+ CurBuilder->buildBr(*SPD.getSuccessMBB());
+ return true;
+}
+
+bool IRTranslator::emitSPDescriptorFailure(StackProtectorDescriptor &SPD,
+ MachineBasicBlock *FailureBB) {
+ CurBuilder->setInsertPt(*FailureBB, FailureBB->end());
+ const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
+
+ const RTLIB::Libcall Libcall = RTLIB::STACKPROTECTOR_CHECK_FAIL;
+ const char *Name = TLI.getLibcallName(Libcall);
+
+ CallLowering::CallLoweringInfo Info;
+ Info.CallConv = TLI.getLibcallCallingConv(Libcall);
+ Info.Callee = MachineOperand::CreateES(Name);
+ Info.OrigRet = {Register(), Type::getVoidTy(MF->getFunction().getContext()),
+ 0};
+ if (!CLI->lowerCall(*CurBuilder, Info)) {
+ LLVM_DEBUG(dbgs() << "Failed to lower call to stack protector fail\n");
+ return false;
+ }
+
+ // On PS4, the "return address" must still be within the calling function,
+ // even if it's at the very end, so emit an explicit TRAP here.
+ // Passing 'true' for doesNotReturn above won't generate the trap for us.
+ // WebAssembly needs an unreachable instruction after a non-returning call,
+ // because the function return type can be different from __stack_chk_fail's
+ // return type (void).
+ const TargetMachine &TM = MF->getTarget();
+ if (TM.getTargetTriple().isPS4CPU() || TM.getTargetTriple().isWasm()) {
+ LLVM_DEBUG(dbgs() << "Unhandled trap emission for stack protector fail\n");
+ return false;
+ }
+ return true;
}
void IRTranslator::finalizeFunction() {
@@ -3069,6 +3281,7 @@ void IRTranslator::finalizeFunction() {
EntryBuilder.reset();
CurBuilder.reset();
FuncInfo.clear();
+ SPDescriptor.resetPerFunctionState();
}
/// Returns true if a BasicBlock \p BB within a variadic function contains a
@@ -3079,7 +3292,7 @@ static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) {
// Walk the block backwards, because tail calls usually only appear at the end
// of a block.
- return std::any_of(BB.rbegin(), BB.rend(), [](const Instruction &I) {
+ return llvm::any_of(llvm::reverse(BB), [](const Instruction &I) {
const auto *CI = dyn_cast<CallInst>(&I);
return CI && CI->isMustTailCall();
});
@@ -3088,8 +3301,6 @@ static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) {
bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
MF = &CurMF;
const Function &F = MF->getFunction();
- if (F.empty())
- return false;
GISelCSEAnalysisWrapper &Wrapper =
getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
// Set the CSEConfig and run the analysis.
@@ -3257,7 +3468,8 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
return false;
}
- finalizeBasicBlock();
+ if (!finalizeBasicBlock(*BB, MBB))
+ return false;
}
#ifndef NDEBUG
WrapperObserver.removeObserver(&Verifier);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 75a8f03fcb3f..9b2692486384 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -20,8 +20,8 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -30,9 +30,9 @@
#include "llvm/Config/config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "instruction-select"
@@ -130,9 +130,12 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
// Until then, keep track of the number of blocks to assert that we don't.
const size_t NumBlocks = MF.size();
#endif
+ // Keep track of selected blocks, so we can delete unreachable ones later.
+ DenseSet<MachineBasicBlock *> SelectedBlocks;
for (MachineBasicBlock *MBB : post_order(&MF)) {
ISel->CurMBB = MBB;
+ SelectedBlocks.insert(MBB);
if (MBB->empty())
continue;
@@ -205,6 +208,15 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
if (MBB.empty())
continue;
+ if (!SelectedBlocks.contains(&MBB)) {
+ // This is an unreachable block and therefore hasn't been selected, since
+ // the main selection loop above uses a postorder block traversal.
+ // We delete all the instructions in this block since it's unreachable.
+ MBB.clear();
+ // Don't delete the block in case the block has it's address taken or is
+ // still being referenced by a phi somewhere.
+ continue;
+ }
// Try to find redundant copies b/w vregs of the same register class.
bool ReachedBegin = false;
for (auto MII = std::prev(MBB.end()), Begin = MBB.begin(); !ReachedBegin;) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 4fec9e628ddb..dc5a4d8f85aa 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -37,7 +37,7 @@ bool InstructionSelector::isOperandImmEqual(
const MachineOperand &MO, int64_t Value,
const MachineRegisterInfo &MRI) const {
if (MO.isReg() && MO.getReg())
- if (auto VRegVal = getConstantVRegValWithLookThrough(MO.getReg(), MRI))
+ if (auto VRegVal = getIConstantVRegValWithLookThrough(MO.getReg(), MRI))
return VRegVal->Value.getSExtValue() == Value;
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index 7c5e4e52ca3e..1f0738a8d9d2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -153,6 +153,14 @@ LegalityPredicate LegalityPredicates::scalarOrEltSizeNotPow2(unsigned TypeIdx) {
};
}
+LegalityPredicate LegalityPredicates::sizeNotMultipleOf(unsigned TypeIdx,
+ unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.isScalar() && QueryTy.getSizeInBits() % Size != 0;
+ };
+}
+
LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
const LLT QueryTy = Query.Types[TypeIdx];
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
index fc2570ae4b8e..75b7fcb5663a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
@@ -63,6 +63,16 @@ LegalizeMutation LegalizeMutations::widenScalarOrEltToNextPow2(unsigned TypeIdx,
};
}
+LegalizeMutation
+LegalizeMutations::widenScalarOrEltToNextMultipleOf(unsigned TypeIdx,
+ unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[TypeIdx];
+ unsigned NewEltSizeInBits = alignTo(Ty.getScalarSizeInBits(), Size);
+ return std::make_pair(TypeIdx, Ty.changeElementSize(NewEltSizeInBits));
+ };
+}
+
LegalizeMutation LegalizeMutations::moreElementsToNextPow2(unsigned TypeIdx,
unsigned Min) {
return [=](const LegalityQuery &Query) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index 635b1445ee07..0ab4a7f64840 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -218,9 +218,6 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
RAIIMFObsDelInstaller Installer(MF, WrapperObserver);
LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder);
LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI);
- auto RemoveDeadInstFromLists = [&WrapperObserver](MachineInstr *DeadMI) {
- WrapperObserver.erasingInstr(*DeadMI);
- };
bool Changed = false;
SmallVector<MachineInstr *, 128> RetryList;
do {
@@ -232,9 +229,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
assert(isPreISelGenericOpcode(MI.getOpcode()) &&
"Expecting generic opcode");
if (isTriviallyDead(MI, MRI)) {
- LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
- MI.eraseFromParentAndMarkDBGValuesForRemoval();
- LocObserver.checkpoint(false);
+ eraseInstr(MI, MRI, &LocObserver);
continue;
}
@@ -281,10 +276,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
assert(isPreISelGenericOpcode(MI.getOpcode()) &&
"Expecting generic opcode");
if (isTriviallyDead(MI, MRI)) {
- LLVM_DEBUG(dbgs() << MI << "Is dead\n");
- RemoveDeadInstFromLists(&MI);
- MI.eraseFromParentAndMarkDBGValuesForRemoval();
- LocObserver.checkpoint(false);
+ eraseInstr(MI, MRI, &LocObserver);
continue;
}
SmallVector<MachineInstr *, 4> DeadInstructions;
@@ -292,11 +284,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions,
WrapperObserver)) {
WorkListObserver.printNewInstrs();
- for (auto *DeadMI : DeadInstructions) {
- LLVM_DEBUG(dbgs() << "Is dead: " << *DeadMI);
- RemoveDeadInstFromLists(DeadMI);
- DeadMI->eraseFromParentAndMarkDBGValuesForRemoval();
- }
+ eraseInstrs(DeadInstructions, MRI, &LocObserver);
LocObserver.checkpoint(
VerifyDebugLocs ==
DebugLocVerifyLevel::LegalizationsAndArtifactCombiners);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index c1e0d2549c42..c74bec7dfc0d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "legalizer"
@@ -497,8 +498,8 @@ static bool isLibCallInTailPosition(MachineInstr &MI,
return false;
// It's not safe to eliminate the sign / zero extension of the return value.
- if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
- CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
+ if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
+ CallerAttrs.hasRetAttr(Attribute::SExt))
return false;
// Only tail call if the following instruction is a standard return or if we
@@ -2051,10 +2052,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Register SrcReg = MI.getOperand(1).getReg();
- // First ZEXT the input.
- auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
+ // First extend the input.
+ unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
+ MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
+ ? TargetOpcode::G_ANYEXT
+ : TargetOpcode::G_ZEXT;
+ auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
LLT CurTy = MRI.getType(SrcReg);
- if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
+ unsigned NewOpc = MI.getOpcode();
+ if (NewOpc == TargetOpcode::G_CTTZ) {
// The count is the same in the larger type except if the original
// value was zero. This can be handled by setting the bit just off
// the top of the original type.
@@ -2062,10 +2068,12 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
MIBSrc = MIRBuilder.buildOr(
WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
+ // Now we know the operand is non-zero, use the more relaxed opcode.
+ NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
}
// Perform the operation at the larger size.
- auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
+ auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
// This is already the correct result for CTPOP and CTTZs
if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
@@ -2427,7 +2435,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
widenScalarSrc(
MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
- TargetOpcode::G_SEXT);
+ TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy, 0);
Observer.changedInstr(MI);
@@ -2662,7 +2670,7 @@ static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
// Now figure out the amount we need to shift to get the target bits.
auto OffsetMask = B.buildConstant(
- IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio));
+ IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
return B.buildShl(IdxTy, OffsetIdx,
B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
@@ -2886,13 +2894,14 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
MachineMemOperand &MMO = LoadMI.getMMO();
LLT MemTy = MMO.getMemoryType();
MachineFunction &MF = MIRBuilder.getMF();
- if (MemTy.isVector())
- return UnableToLegalize;
unsigned MemSizeInBits = MemTy.getSizeInBits();
unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
if (MemSizeInBits != MemStoreSizeInBits) {
+ if (MemTy.isVector())
+ return UnableToLegalize;
+
// Promote to a byte-sized load if not loading an integral number of
// bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
@@ -2928,16 +2937,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
return Legalized;
}
- // This load needs splitting into power of 2 sized loads.
- if (DstTy.isVector())
- return UnableToLegalize;
- if (isPowerOf2_32(MemSizeInBits))
- return UnableToLegalize; // Don't know what we're being asked to do.
-
// Big endian lowering not implemented.
if (MIRBuilder.getDataLayout().isBigEndian())
return UnableToLegalize;
+ // This load needs splitting into power of 2 sized loads.
+ //
// Our strategy here is to generate anyextending loads for the smaller
// types up to next power-2 result type, and then combine the two larger
// result values together, before truncating back down to the non-pow-2
@@ -2950,8 +2955,34 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
// v1 = i24 trunc v5
// By doing this we generate the correct truncate which should get
// combined away as an artifact with a matching extend.
- uint64_t LargeSplitSize = PowerOf2Floor(MemSizeInBits);
- uint64_t SmallSplitSize = MemSizeInBits - LargeSplitSize;
+
+ uint64_t LargeSplitSize, SmallSplitSize;
+
+ if (!isPowerOf2_32(MemSizeInBits)) {
+ // This load needs splitting into power of 2 sized loads.
+ LargeSplitSize = PowerOf2Floor(MemSizeInBits);
+ SmallSplitSize = MemSizeInBits - LargeSplitSize;
+ } else {
+ // This is already a power of 2, but we still need to split this in half.
+ //
+ // Assume we're being asked to decompose an unaligned load.
+ // TODO: If this requires multiple splits, handle them all at once.
+ auto &Ctx = MF.getFunction().getContext();
+ if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
+ return UnableToLegalize;
+
+ SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
+ }
+
+ if (MemTy.isVector()) {
+ // TODO: Handle vector extloads
+ if (MemTy != DstTy)
+ return UnableToLegalize;
+
+ // TODO: We can do better than scalarizing the vector and at least split it
+ // in half.
+ return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
+ }
MachineMemOperand *LargeMMO =
MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
@@ -2976,9 +3007,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
if (AnyExtTy == DstTy)
MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
- else {
+ else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
MIRBuilder.buildTrunc(DstReg, {Or});
+ } else {
+ assert(DstTy.isPointer() && "expected pointer");
+ auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
+
+ // FIXME: We currently consider this to be illegal for non-integral address
+ // spaces, but we need still need a way to reinterpret the bits.
+ MIRBuilder.buildIntToPtr(DstReg, Or);
}
LoadMI.eraseFromParent();
@@ -2999,13 +3037,13 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
MachineMemOperand &MMO = **StoreMI.memoperands_begin();
LLT MemTy = MMO.getMemoryType();
- if (SrcTy.isVector())
- return UnableToLegalize;
-
unsigned StoreWidth = MemTy.getSizeInBits();
unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
if (StoreWidth != StoreSizeInBits) {
+ if (SrcTy.isVector())
+ return UnableToLegalize;
+
// Promote to a byte-sized store with upper bits zero if not
// storing an integral number of bytes. For example, promote
// TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
@@ -3026,18 +3064,44 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
return Legalized;
}
- if (isPowerOf2_32(MemTy.getSizeInBits()))
- return UnableToLegalize; // Don't know what we're being asked to do.
+ if (MemTy.isVector()) {
+ // TODO: Handle vector trunc stores
+ if (MemTy != SrcTy)
+ return UnableToLegalize;
+
+ // TODO: We can do better than scalarizing the vector and at least split it
+ // in half.
+ return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
+ }
+
+ unsigned MemSizeInBits = MemTy.getSizeInBits();
+ uint64_t LargeSplitSize, SmallSplitSize;
+
+ if (!isPowerOf2_32(MemSizeInBits)) {
+ LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
+ SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
+ } else {
+ auto &Ctx = MF.getFunction().getContext();
+ if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
+ return UnableToLegalize; // Don't know what we're being asked to do.
+
+ SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
+ }
// Extend to the next pow-2. If this store was itself the result of lowering,
// e.g. an s56 store being broken into s32 + s24, we might have a stored type
- // that's wider the stored size.
- const LLT NewSrcTy = LLT::scalar(NextPowerOf2(MemTy.getSizeInBits()));
+ // that's wider than the stored size.
+ unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
+ const LLT NewSrcTy = LLT::scalar(AnyExtSize);
+
+ if (SrcTy.isPointer()) {
+ const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
+ SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
+ }
+
auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
// Obtain the smaller value by shifting away the larger value.
- uint64_t LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
- uint64_t SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
@@ -3045,9 +3109,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
LLT PtrTy = MRI.getType(PtrReg);
auto OffsetCst = MIRBuilder.buildConstant(
LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
- Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
auto SmallPtr =
- MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
+ MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
MachineMemOperand *LargeMMO =
MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
@@ -3424,6 +3487,14 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case G_ROTL:
case G_ROTR:
return lowerRotate(MI);
+ case G_MEMSET:
+ case G_MEMCPY:
+ case G_MEMMOVE:
+ return lowerMemCpyFamily(MI);
+ case G_MEMCPY_INLINE:
+ return lowerMemcpyInline(MI);
+ GISEL_VECREDUCE_CASES_NONSEQ
+ return lowerVectorReduction(MI);
}
}
@@ -4004,9 +4075,7 @@ LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
// If the index is a constant, we can really break this down as you would
// expect, and index into the target size pieces.
int64_t IdxVal;
- auto MaybeCst =
- getConstantVRegValWithLookThrough(Idx, MRI, /*LookThroughInstrs*/ true,
- /*HandleFConstants*/ false);
+ auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
if (MaybeCst) {
IdxVal = MaybeCst->Value.getSExtValue();
// Avoid out of bounds indexing the pieces.
@@ -4363,6 +4432,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FMAXIMUM:
case G_FSHL:
case G_FSHR:
+ case G_ROTL:
+ case G_ROTR:
case G_FREEZE:
case G_SADDSAT:
case G_SSUBSAT:
@@ -4572,35 +4643,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
return Legalized;
}
-LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
- MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
- unsigned Opc = MI.getOpcode();
- assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
- Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
- "Sequential reductions not expected");
-
- if (TypeIdx != 1)
- return UnableToLegalize;
-
- // The semantics of the normal non-sequential reductions allow us to freely
- // re-associate the operation.
- Register SrcReg = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
-
- if (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0)
- return UnableToLegalize;
-
- SmallVector<Register> SplitSrcs;
- const unsigned NumParts = SrcTy.getNumElements() / NarrowTy.getNumElements();
- extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
- SmallVector<Register> PartialReductions;
- for (unsigned Part = 0; Part < NumParts; ++Part) {
- PartialReductions.push_back(
- MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
- }
-
+static unsigned getScalarOpcForReduction(unsigned Opc) {
unsigned ScalarOpc;
switch (Opc) {
case TargetOpcode::G_VECREDUCE_FADD:
@@ -4643,10 +4686,81 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
ScalarOpc = TargetOpcode::G_UMIN;
break;
default:
- LLVM_DEBUG(dbgs() << "Can't legalize: unknown reduction kind.\n");
+ llvm_unreachable("Unhandled reduction");
+ }
+ return ScalarOpc;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
+ MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
+ unsigned Opc = MI.getOpcode();
+ assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
+ Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
+ "Sequential reductions not expected");
+
+ if (TypeIdx != 1)
return UnableToLegalize;
+
+ // The semantics of the normal non-sequential reductions allow us to freely
+ // re-associate the operation.
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ if (NarrowTy.isVector() &&
+ (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
+ return UnableToLegalize;
+
+ unsigned ScalarOpc = getScalarOpcForReduction(Opc);
+ SmallVector<Register> SplitSrcs;
+ // If NarrowTy is a scalar then we're being asked to scalarize.
+ const unsigned NumParts =
+ NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
+ : SrcTy.getNumElements();
+
+ extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
+ if (NarrowTy.isScalar()) {
+ if (DstTy != NarrowTy)
+ return UnableToLegalize; // FIXME: handle implicit extensions.
+
+ if (isPowerOf2_32(NumParts)) {
+ // Generate a tree of scalar operations to reduce the critical path.
+ SmallVector<Register> PartialResults;
+ unsigned NumPartsLeft = NumParts;
+ while (NumPartsLeft > 1) {
+ for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
+ PartialResults.emplace_back(
+ MIRBuilder
+ .buildInstr(ScalarOpc, {NarrowTy},
+ {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
+ .getReg(0));
+ }
+ SplitSrcs = PartialResults;
+ PartialResults.clear();
+ NumPartsLeft = SplitSrcs.size();
+ }
+ assert(SplitSrcs.size() == 1);
+ MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ // If we can't generate a tree, then just do sequential operations.
+ Register Acc = SplitSrcs[0];
+ for (unsigned Idx = 1; Idx < NumParts; ++Idx)
+ Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
+ .getReg(0);
+ MIRBuilder.buildCopy(DstReg, Acc);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ SmallVector<Register> PartialReductions;
+ for (unsigned Part = 0; Part < NumParts; ++Part) {
+ PartialReductions.push_back(
+ MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
}
+
// If the types involved are powers of 2, we can generate intermediate vector
// ops, before generating a final reduction operation.
if (isPowerOf2_32(SrcTy.getNumElements()) &&
@@ -4706,7 +4820,7 @@ LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
Register InH = MRI.createGenericVirtualRegister(HalfTy);
MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
- if (Amt.isNullValue()) {
+ if (Amt.isZero()) {
MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
MI.eraseFromParent();
return Legalized;
@@ -4815,10 +4929,9 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
const LLT HalfTy = LLT::scalar(NewBitSize);
const LLT CondTy = LLT::scalar(1);
- if (const MachineInstr *KShiftAmt =
- getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
- return narrowScalarShiftByConstant(
- MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
+ if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
+ return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
+ ShiftAmtTy);
}
// TODO: Expand with known bits.
@@ -5224,26 +5337,23 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
if (Ty.isVector())
return UnableToLegalize;
- unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
- unsigned DstSize = Ty.getSizeInBits();
+ unsigned Size = Ty.getSizeInBits();
unsigned NarrowSize = NarrowTy.getSizeInBits();
- if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
+ if (Size % NarrowSize != 0)
return UnableToLegalize;
- unsigned NumDstParts = DstSize / NarrowSize;
- unsigned NumSrcParts = SrcSize / NarrowSize;
+ unsigned NumParts = Size / NarrowSize;
bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
- unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
+ unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
SmallVector<Register, 2> Src1Parts, Src2Parts;
SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
- extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
- extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
+ extractParts(Src1, NarrowTy, NumParts, Src1Parts);
+ extractParts(Src2, NarrowTy, NumParts, Src2Parts);
multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
// Take only high half of registers if this is high mul.
- ArrayRef<Register> DstRegs(
- IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
+ ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
@@ -5951,7 +6061,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
Register Src = MI.getOperand(1).getReg();
Register Amt = MI.getOperand(2).getReg();
LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
LLT AmtTy = MRI.getType(Amt);
unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
@@ -5965,6 +6075,27 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
isPowerOf2_32(EltSizeInBits))
return lowerRotateWithReverseRotate(MI);
+ // If a funnel shift is supported, use it.
+ unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
+ unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
+ bool IsFShLegal = false;
+ if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
+ LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
+ auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
+ Register R3) {
+ MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
+ MI.eraseFromParent();
+ return Legalized;
+ };
+ // If a funnel shift in the other direction is supported, use it.
+ if (IsFShLegal) {
+ return buildFunnelShift(FShOpc, Dst, Src, Amt);
+ } else if (isPowerOf2_32(EltSizeInBits)) {
+ Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
+ return buildFunnelShift(RevFsh, Dst, Src, Amt);
+ }
+ }
+
auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
@@ -6150,7 +6281,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
: APFloat::IEEEdouble(),
- APInt::getNullValue(SrcTy.getSizeInBits()));
+ APInt::getZero(SrcTy.getSizeInBits()));
TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
@@ -7293,3 +7424,563 @@ LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {
MI.eraseFromParent();
return Legalized;
}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ LLT DstTy = MRI.getType(SrcReg);
+
+ // The source could be a scalar if the IR type was <1 x sN>.
+ if (SrcTy.isScalar()) {
+ if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
+ return UnableToLegalize; // FIXME: handle extension.
+ // This can be just a plain copy.
+ Observer.changingInstr(MI);
+ MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ return UnableToLegalize;;
+}
+
+static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
+ // On Darwin, -Os means optimize for size without hurting performance, so
+ // only really optimize for size when -Oz (MinSize) is used.
+ if (MF.getTarget().getTargetTriple().isOSDarwin())
+ return MF.getFunction().hasMinSize();
+ return MF.getFunction().hasOptSize();
+}
+
+// Returns a list of types to use for memory op lowering in MemOps. A partial
+// port of findOptimalMemOpLowering in TargetLowering.
+static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
+ unsigned Limit, const MemOp &Op,
+ unsigned DstAS, unsigned SrcAS,
+ const AttributeList &FuncAttributes,
+ const TargetLowering &TLI) {
+ if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
+ return false;
+
+ LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
+
+ if (Ty == LLT()) {
+ // Use the largest scalar type whose alignment constraints are satisfied.
+ // We only need to check DstAlign here as SrcAlign is always greater or
+ // equal to DstAlign (or zero).
+ Ty = LLT::scalar(64);
+ if (Op.isFixedDstAlign())
+ while (Op.getDstAlign() < Ty.getSizeInBytes() &&
+ !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
+ Ty = LLT::scalar(Ty.getSizeInBytes());
+ assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
+ // FIXME: check for the largest legal type we can load/store to.
+ }
+
+ unsigned NumMemOps = 0;
+ uint64_t Size = Op.size();
+ while (Size) {
+ unsigned TySize = Ty.getSizeInBytes();
+ while (TySize > Size) {
+ // For now, only use non-vector load / store's for the left-over pieces.
+ LLT NewTy = Ty;
+ // FIXME: check for mem op safety and legality of the types. Not all of
+ // SDAGisms map cleanly to GISel concepts.
+ if (NewTy.isVector())
+ NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
+ NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
+ unsigned NewTySize = NewTy.getSizeInBytes();
+ assert(NewTySize > 0 && "Could not find appropriate type");
+
+ // If the new LLT cannot cover all of the remaining bits, then consider
+ // issuing a (or a pair of) unaligned and overlapping load / store.
+ bool Fast;
+ // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
+ MVT VT = getMVTForLLT(Ty);
+ if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
+ TLI.allowsMisalignedMemoryAccesses(
+ VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
+ MachineMemOperand::MONone, &Fast) &&
+ Fast)
+ TySize = Size;
+ else {
+ Ty = NewTy;
+ TySize = NewTySize;
+ }
+ }
+
+ if (++NumMemOps > Limit)
+ return false;
+
+ MemOps.push_back(Ty);
+ Size -= TySize;
+ }
+
+ return true;
+}
+
+static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
+ if (Ty.isVector())
+ return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
+ Ty.getNumElements());
+ return IntegerType::get(C, Ty.getSizeInBits());
+}
+
+// Get a vectorized representation of the memset value operand, GISel edition.
+static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ unsigned NumBits = Ty.getScalarSizeInBits();
+ auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
+ if (!Ty.isVector() && ValVRegAndVal) {
+ APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8);
+ APInt SplatVal = APInt::getSplat(NumBits, Scalar);
+ return MIB.buildConstant(Ty, SplatVal).getReg(0);
+ }
+
+ // Extend the byte value to the larger type, and then multiply by a magic
+ // value 0x010101... in order to replicate it across every byte.
+ // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
+ if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
+ return MIB.buildConstant(Ty, 0).getReg(0);
+ }
+
+ LLT ExtType = Ty.getScalarType();
+ auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
+ if (NumBits > 8) {
+ APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
+ auto MagicMI = MIB.buildConstant(ExtType, Magic);
+ Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
+ }
+
+ // For vector types create a G_BUILD_VECTOR.
+ if (Ty.isVector())
+ Val = MIB.buildSplatVector(Ty, Val).getReg(0);
+
+ return Val;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
+ uint64_t KnownLen, Align Alignment,
+ bool IsVolatile) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ auto &DL = MF.getDataLayout();
+ LLVMContext &C = MF.getFunction().getContext();
+
+ assert(KnownLen != 0 && "Have a zero length memset length!");
+
+ bool DstAlignCanChange = false;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+
+ MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
+ if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
+ DstAlignCanChange = true;
+
+ unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
+ std::vector<LLT> MemOps;
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
+
+ auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
+ bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
+
+ if (!findGISelOptimalMemOpLowering(MemOps, Limit,
+ MemOp::Set(KnownLen, DstAlignCanChange,
+ Alignment,
+ /*IsZeroMemset=*/IsZeroVal,
+ /*IsVolatile=*/IsVolatile),
+ DstPtrInfo.getAddrSpace(), ~0u,
+ MF.getFunction().getAttributes(), TLI))
+ return UnableToLegalize;
+
+ if (DstAlignCanChange) {
+ // Get an estimate of the type from the LLT.
+ Type *IRTy = getTypeForLLT(MemOps[0], C);
+ Align NewAlign = DL.getABITypeAlign(IRTy);
+ if (NewAlign > Alignment) {
+ Alignment = NewAlign;
+ unsigned FI = FIDef->getOperand(1).getIndex();
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlign(FI) < Alignment)
+ MFI.setObjectAlignment(FI, Alignment);
+ }
+ }
+
+ MachineIRBuilder MIB(MI);
+ // Find the largest store and generate the bit pattern for it.
+ LLT LargestTy = MemOps[0];
+ for (unsigned i = 1; i < MemOps.size(); i++)
+ if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
+ LargestTy = MemOps[i];
+
+ // The memset stored value is always defined as an s8, so in order to make it
+ // work with larger store types we need to repeat the bit pattern across the
+ // wider type.
+ Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
+
+ if (!MemSetValue)
+ return UnableToLegalize;
+
+ // Generate the stores. For each store type in the list, we generate the
+ // matching store of that type to the destination address.
+ LLT PtrTy = MRI.getType(Dst);
+ unsigned DstOff = 0;
+ unsigned Size = KnownLen;
+ for (unsigned I = 0; I < MemOps.size(); I++) {
+ LLT Ty = MemOps[I];
+ unsigned TySize = Ty.getSizeInBytes();
+ if (TySize > Size) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ assert(I == MemOps.size() - 1 && I != 0);
+ DstOff -= TySize - Size;
+ }
+
+ // If this store is smaller than the largest store see whether we can get
+ // the smaller value for free with a truncate.
+ Register Value = MemSetValue;
+ if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
+ MVT VT = getMVTForLLT(Ty);
+ MVT LargestVT = getMVTForLLT(LargestTy);
+ if (!LargestTy.isVector() && !Ty.isVector() &&
+ TLI.isTruncateFree(LargestVT, VT))
+ Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
+ else
+ Value = getMemsetValue(Val, Ty, MIB);
+ if (!Value)
+ return UnableToLegalize;
+ }
+
+ auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
+
+ Register Ptr = Dst;
+ if (DstOff != 0) {
+ auto Offset =
+ MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
+ Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
+ }
+
+ MIB.buildStore(Value, Ptr, *StoreMMO);
+ DstOff += Ty.getSizeInBytes();
+ Size -= TySize;
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ Register Len = MI.getOperand(2).getReg();
+
+ const auto *MMOIt = MI.memoperands_begin();
+ const MachineMemOperand *MemOp = *MMOIt;
+ bool IsVolatile = MemOp->isVolatile();
+
+ // See if this is a constant length copy
+ auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
+ // FIXME: support dynamically sized G_MEMCPY_INLINE
+ assert(LenVRegAndVal.hasValue() &&
+ "inline memcpy with dynamic size is not yet supported");
+ uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
+ if (KnownLen == 0) {
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ const auto &SrcMMO = **std::next(MI.memoperands_begin());
+ Align DstAlign = DstMMO.getBaseAlign();
+ Align SrcAlign = SrcMMO.getBaseAlign();
+
+ return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
+ IsVolatile);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
+ uint64_t KnownLen, Align DstAlign,
+ Align SrcAlign, bool IsVolatile) {
+ assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
+ return lowerMemcpy(MI, Dst, Src, KnownLen,
+ std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
+ IsVolatile);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
+ uint64_t KnownLen, uint64_t Limit, Align DstAlign,
+ Align SrcAlign, bool IsVolatile) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ auto &DL = MF.getDataLayout();
+ LLVMContext &C = MF.getFunction().getContext();
+
+ assert(KnownLen != 0 && "Have a zero length memcpy length!");
+
+ bool DstAlignCanChange = false;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ Align Alignment = commonAlignment(DstAlign, SrcAlign);
+
+ MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
+ if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
+ DstAlignCanChange = true;
+
+ // FIXME: infer better src pointer alignment like SelectionDAG does here.
+ // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
+ // if the memcpy is in a tail call position.
+
+ std::vector<LLT> MemOps;
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ const auto &SrcMMO = **std::next(MI.memoperands_begin());
+ MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
+ MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
+
+ if (!findGISelOptimalMemOpLowering(
+ MemOps, Limit,
+ MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
+ IsVolatile),
+ DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
+ MF.getFunction().getAttributes(), TLI))
+ return UnableToLegalize;
+
+ if (DstAlignCanChange) {
+ // Get an estimate of the type from the LLT.
+ Type *IRTy = getTypeForLLT(MemOps[0], C);
+ Align NewAlign = DL.getABITypeAlign(IRTy);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TRI->hasStackRealignment(MF))
+ while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
+ NewAlign = NewAlign / 2;
+
+ if (NewAlign > Alignment) {
+ Alignment = NewAlign;
+ unsigned FI = FIDef->getOperand(1).getIndex();
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlign(FI) < Alignment)
+ MFI.setObjectAlignment(FI, Alignment);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
+
+ MachineIRBuilder MIB(MI);
+ // Now we need to emit a pair of load and stores for each of the types we've
+ // collected. I.e. for each type, generate a load from the source pointer of
+ // that type width, and then generate a corresponding store to the dest buffer
+ // of that value loaded. This can result in a sequence of loads and stores
+ // mixed types, depending on what the target specifies as good types to use.
+ unsigned CurrOffset = 0;
+ LLT PtrTy = MRI.getType(Src);
+ unsigned Size = KnownLen;
+ for (auto CopyTy : MemOps) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ if (CopyTy.getSizeInBytes() > Size)
+ CurrOffset -= CopyTy.getSizeInBytes() - Size;
+
+ // Construct MMOs for the accesses.
+ auto *LoadMMO =
+ MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
+ auto *StoreMMO =
+ MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
+
+ // Create the load.
+ Register LoadPtr = Src;
+ Register Offset;
+ if (CurrOffset != 0) {
+ Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
+ .getReg(0);
+ LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
+ }
+ auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
+
+ // Create the store.
+ Register StorePtr =
+ CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
+ MIB.buildStore(LdVal, StorePtr, *StoreMMO);
+ CurrOffset += CopyTy.getSizeInBytes();
+ Size -= CopyTy.getSizeInBytes();
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
+ uint64_t KnownLen, Align DstAlign, Align SrcAlign,
+ bool IsVolatile) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ auto &DL = MF.getDataLayout();
+ LLVMContext &C = MF.getFunction().getContext();
+
+ assert(KnownLen != 0 && "Have a zero length memmove length!");
+
+ bool DstAlignCanChange = false;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+ Align Alignment = commonAlignment(DstAlign, SrcAlign);
+
+ MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
+ if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
+ DstAlignCanChange = true;
+
+ unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
+ std::vector<LLT> MemOps;
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ const auto &SrcMMO = **std::next(MI.memoperands_begin());
+ MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
+ MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
+
+ // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
+ // to a bug in it's findOptimalMemOpLowering implementation. For now do the
+ // same thing here.
+ if (!findGISelOptimalMemOpLowering(
+ MemOps, Limit,
+ MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
+ /*IsVolatile*/ true),
+ DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
+ MF.getFunction().getAttributes(), TLI))
+ return UnableToLegalize;
+
+ if (DstAlignCanChange) {
+ // Get an estimate of the type from the LLT.
+ Type *IRTy = getTypeForLLT(MemOps[0], C);
+ Align NewAlign = DL.getABITypeAlign(IRTy);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TRI->hasStackRealignment(MF))
+ while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
+ NewAlign = NewAlign / 2;
+
+ if (NewAlign > Alignment) {
+ Alignment = NewAlign;
+ unsigned FI = FIDef->getOperand(1).getIndex();
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlign(FI) < Alignment)
+ MFI.setObjectAlignment(FI, Alignment);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
+
+ MachineIRBuilder MIB(MI);
+ // Memmove requires that we perform the loads first before issuing the stores.
+ // Apart from that, this loop is pretty much doing the same thing as the
+ // memcpy codegen function.
+ unsigned CurrOffset = 0;
+ LLT PtrTy = MRI.getType(Src);
+ SmallVector<Register, 16> LoadVals;
+ for (auto CopyTy : MemOps) {
+ // Construct MMO for the load.
+ auto *LoadMMO =
+ MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
+
+ // Create the load.
+ Register LoadPtr = Src;
+ if (CurrOffset != 0) {
+ auto Offset =
+ MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
+ LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
+ }
+ LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
+ CurrOffset += CopyTy.getSizeInBytes();
+ }
+
+ CurrOffset = 0;
+ for (unsigned I = 0; I < MemOps.size(); ++I) {
+ LLT CopyTy = MemOps[I];
+ // Now store the values loaded.
+ auto *StoreMMO =
+ MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
+
+ Register StorePtr = Dst;
+ if (CurrOffset != 0) {
+ auto Offset =
+ MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
+ StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
+ }
+ MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
+ CurrOffset += CopyTy.getSizeInBytes();
+ }
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
+ const unsigned Opc = MI.getOpcode();
+ // This combine is fairly complex so it's not written with a separate
+ // matcher function.
+ assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
+ Opc == TargetOpcode::G_MEMSET) &&
+ "Expected memcpy like instruction");
+
+ auto MMOIt = MI.memoperands_begin();
+ const MachineMemOperand *MemOp = *MMOIt;
+
+ Align DstAlign = MemOp->getBaseAlign();
+ Align SrcAlign;
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ Register Len = MI.getOperand(2).getReg();
+
+ if (Opc != TargetOpcode::G_MEMSET) {
+ assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
+ MemOp = *(++MMOIt);
+ SrcAlign = MemOp->getBaseAlign();
+ }
+
+ // See if this is a constant length copy
+ auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
+ if (!LenVRegAndVal)
+ return UnableToLegalize;
+ uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
+
+ if (KnownLen == 0) {
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ bool IsVolatile = MemOp->isVolatile();
+ if (Opc == TargetOpcode::G_MEMCPY_INLINE)
+ return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
+ IsVolatile);
+
+ // Don't try to optimize volatile.
+ if (IsVolatile)
+ return UnableToLegalize;
+
+ if (MaxLen && KnownLen > MaxLen)
+ return UnableToLegalize;
+
+ if (Opc == TargetOpcode::G_MEMCPY) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+ uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
+ return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
+ IsVolatile);
+ }
+ if (Opc == TargetOpcode::G_MEMMOVE)
+ return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
+ if (Opc == TargetOpcode::G_MEMSET)
+ return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
+ return UnableToLegalize;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 3e3141657e87..30697913a6a4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -352,8 +352,7 @@ LegalizerInfo::getAction(const MachineInstr &MI,
SmallVector<LegalityQuery::MemDesc, 2> MemDescrs;
for (const auto &MMO : MI.memoperands())
- MemDescrs.push_back({MMO->getMemoryType(), 8 * MMO->getAlign().value(),
- MMO->getSuccessOrdering()});
+ MemDescrs.push_back({*MMO});
return getAction({MI.getOpcode(), Types, MemDescrs});
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
new file mode 100644
index 000000000000..03dda806cb1e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
@@ -0,0 +1,669 @@
+//===- LoadStoreOpt.cpp ----------- Generic memory optimizations -*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the LoadStoreOpt optimization pass.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+
+#define DEBUG_TYPE "loadstore-opt"
+
+using namespace llvm;
+using namespace ore;
+using namespace MIPatternMatch;
+
+STATISTIC(NumStoresMerged, "Number of stores merged");
+
+const unsigned MaxStoreSizeToForm = 128;
+
+char LoadStoreOpt::ID = 0;
+INITIALIZE_PASS_BEGIN(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
+ false, false)
+INITIALIZE_PASS_END(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
+ false, false)
+
+LoadStoreOpt::LoadStoreOpt(std::function<bool(const MachineFunction &)> F)
+ : MachineFunctionPass(ID), DoNotRunPass(F) {}
+
+LoadStoreOpt::LoadStoreOpt()
+ : LoadStoreOpt([](const MachineFunction &) { return false; }) {}
+
+void LoadStoreOpt::init(MachineFunction &MF) {
+ this->MF = &MF;
+ MRI = &MF.getRegInfo();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ TLI = MF.getSubtarget().getTargetLowering();
+ LI = MF.getSubtarget().getLegalizerInfo();
+ Builder.setMF(MF);
+ IsPreLegalizer = !MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Legalized);
+ InstsToErase.clear();
+}
+
+void LoadStoreOpt::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AAResultsWrapperPass>();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+BaseIndexOffset GISelAddressing::getPointerInfo(Register Ptr,
+ MachineRegisterInfo &MRI) {
+ BaseIndexOffset Info;
+ Register PtrAddRHS;
+ if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(Info.BaseReg), m_Reg(PtrAddRHS)))) {
+ Info.BaseReg = Ptr;
+ Info.IndexReg = Register();
+ Info.IsIndexSignExt = false;
+ return Info;
+ }
+
+ auto RHSCst = getIConstantVRegValWithLookThrough(PtrAddRHS, MRI);
+ if (RHSCst)
+ Info.Offset = RHSCst->Value.getSExtValue();
+
+ // Just recognize a simple case for now. In future we'll need to match
+ // indexing patterns for base + index + constant.
+ Info.IndexReg = PtrAddRHS;
+ Info.IsIndexSignExt = false;
+ return Info;
+}
+
+bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
+ const MachineInstr &MI2,
+ bool &IsAlias,
+ MachineRegisterInfo &MRI) {
+ auto *LdSt1 = dyn_cast<GLoadStore>(&MI1);
+ auto *LdSt2 = dyn_cast<GLoadStore>(&MI2);
+ if (!LdSt1 || !LdSt2)
+ return false;
+
+ BaseIndexOffset BasePtr0 = getPointerInfo(LdSt1->getPointerReg(), MRI);
+ BaseIndexOffset BasePtr1 = getPointerInfo(LdSt2->getPointerReg(), MRI);
+
+ if (!BasePtr0.BaseReg.isValid() || !BasePtr1.BaseReg.isValid())
+ return false;
+
+ int64_t Size1 = LdSt1->getMemSize();
+ int64_t Size2 = LdSt2->getMemSize();
+
+ int64_t PtrDiff;
+ if (BasePtr0.BaseReg == BasePtr1.BaseReg) {
+ PtrDiff = BasePtr1.Offset - BasePtr0.Offset;
+ // If the size of memory access is unknown, do not use it to do analysis.
+ // One example of unknown size memory access is to load/store scalable
+ // vector objects on the stack.
+ // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
+ // following situations arise:
+ if (PtrDiff >= 0 &&
+ Size1 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
+ // [----BasePtr0----]
+ // [---BasePtr1--]
+ // ========PtrDiff========>
+ IsAlias = !(Size1 <= PtrDiff);
+ return true;
+ }
+ if (PtrDiff < 0 &&
+ Size2 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
+ // [----BasePtr0----]
+ // [---BasePtr1--]
+ // =====(-PtrDiff)====>
+ IsAlias = !((PtrDiff + Size2) <= 0);
+ return true;
+ }
+ return false;
+ }
+
+ // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
+ // able to calculate their relative offset if at least one arises
+ // from an alloca. However, these allocas cannot overlap and we
+ // can infer there is no alias.
+ auto *Base0Def = getDefIgnoringCopies(BasePtr0.BaseReg, MRI);
+ auto *Base1Def = getDefIgnoringCopies(BasePtr1.BaseReg, MRI);
+ if (!Base0Def || !Base1Def)
+ return false; // Couldn't tell anything.
+
+
+ if (Base0Def->getOpcode() != Base1Def->getOpcode())
+ return false;
+
+ if (Base0Def->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
+ MachineFrameInfo &MFI = Base0Def->getMF()->getFrameInfo();
+ // If the bases have the same frame index but we couldn't find a
+ // constant offset, (indices are different) be conservative.
+ if (Base0Def != Base1Def &&
+ (!MFI.isFixedObjectIndex(Base0Def->getOperand(1).getIndex()) ||
+ !MFI.isFixedObjectIndex(Base1Def->getOperand(1).getIndex()))) {
+ IsAlias = false;
+ return true;
+ }
+ }
+
+ // This implementation is a lot more primitive than the SDAG one for now.
+ // FIXME: what about constant pools?
+ if (Base0Def->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
+ auto GV0 = Base0Def->getOperand(1).getGlobal();
+ auto GV1 = Base1Def->getOperand(1).getGlobal();
+ if (GV0 != GV1) {
+ IsAlias = false;
+ return true;
+ }
+ }
+
+ // Can't tell anything about aliasing.
+ return false;
+}
+
+bool GISelAddressing::instMayAlias(const MachineInstr &MI,
+ const MachineInstr &Other,
+ MachineRegisterInfo &MRI,
+ AliasAnalysis *AA) {
+ struct MemUseCharacteristics {
+ bool IsVolatile;
+ bool IsAtomic;
+ Register BasePtr;
+ int64_t Offset;
+ uint64_t NumBytes;
+ MachineMemOperand *MMO;
+ };
+
+ auto getCharacteristics =
+ [&](const MachineInstr *MI) -> MemUseCharacteristics {
+ if (const auto *LS = dyn_cast<GLoadStore>(MI)) {
+ Register BaseReg;
+ int64_t Offset = 0;
+ // No pre/post-inc addressing modes are considered here, unlike in SDAG.
+ if (!mi_match(LS->getPointerReg(), MRI,
+ m_GPtrAdd(m_Reg(BaseReg), m_ICst(Offset)))) {
+ BaseReg = LS->getPointerReg();
+ Offset = 0;
+ }
+
+ uint64_t Size = MemoryLocation::getSizeOrUnknown(
+ LS->getMMO().getMemoryType().getSizeInBytes());
+ return {LS->isVolatile(), LS->isAtomic(), BaseReg,
+ Offset /*base offset*/, Size, &LS->getMMO()};
+ }
+ // FIXME: support recognizing lifetime instructions.
+ // Default.
+ return {false /*isvolatile*/,
+ /*isAtomic*/ false, Register(),
+ (int64_t)0 /*offset*/, 0 /*size*/,
+ (MachineMemOperand *)nullptr};
+ };
+ MemUseCharacteristics MUC0 = getCharacteristics(&MI),
+ MUC1 = getCharacteristics(&Other);
+
+ // If they are to the same address, then they must be aliases.
+ if (MUC0.BasePtr.isValid() && MUC0.BasePtr == MUC1.BasePtr &&
+ MUC0.Offset == MUC1.Offset)
+ return true;
+
+ // If they are both volatile then they cannot be reordered.
+ if (MUC0.IsVolatile && MUC1.IsVolatile)
+ return true;
+
+ // Be conservative about atomics for the moment
+ // TODO: This is way overconservative for unordered atomics (see D66309)
+ if (MUC0.IsAtomic && MUC1.IsAtomic)
+ return true;
+
+ // If one operation reads from invariant memory, and the other may store, they
+ // cannot alias.
+ if (MUC0.MMO && MUC1.MMO) {
+ if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
+ (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
+ return false;
+ }
+
+ // Try to prove that there is aliasing, or that there is no aliasing. Either
+ // way, we can return now. If nothing can be proved, proceed with more tests.
+ bool IsAlias;
+ if (GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI))
+ return IsAlias;
+
+ // The following all rely on MMO0 and MMO1 being valid.
+ if (!MUC0.MMO || !MUC1.MMO)
+ return true;
+
+ // FIXME: port the alignment based alias analysis from SDAG's isAlias().
+ int64_t SrcValOffset0 = MUC0.MMO->getOffset();
+ int64_t SrcValOffset1 = MUC1.MMO->getOffset();
+ uint64_t Size0 = MUC0.NumBytes;
+ uint64_t Size1 = MUC1.NumBytes;
+ if (AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
+ Size0 != MemoryLocation::UnknownSize &&
+ Size1 != MemoryLocation::UnknownSize) {
+ // Use alias analysis information.
+ int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
+ int64_t Overlap0 = Size0 + SrcValOffset0 - MinOffset;
+ int64_t Overlap1 = Size1 + SrcValOffset1 - MinOffset;
+ if (AA->isNoAlias(MemoryLocation(MUC0.MMO->getValue(), Overlap0,
+ MUC0.MMO->getAAInfo()),
+ MemoryLocation(MUC1.MMO->getValue(), Overlap1,
+ MUC1.MMO->getAAInfo())))
+ return false;
+ }
+
+ // Otherwise we have to assume they alias.
+ return true;
+}
+
+/// Returns true if the instruction creates an unavoidable hazard that
+/// forces a boundary between store merge candidates.
+static bool isInstHardMergeHazard(MachineInstr &MI) {
+ return MI.hasUnmodeledSideEffects() || MI.hasOrderedMemoryRef();
+}
+
+bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) {
+ // Try to merge all the stores in the vector, splitting into separate segments
+ // as necessary.
+ assert(StoresToMerge.size() > 1 && "Expected multiple stores to merge");
+ LLT OrigTy = MRI->getType(StoresToMerge[0]->getValueReg());
+ LLT PtrTy = MRI->getType(StoresToMerge[0]->getPointerReg());
+ unsigned AS = PtrTy.getAddressSpace();
+ // Ensure the legal store info is computed for this address space.
+ initializeStoreMergeTargetInfo(AS);
+ const auto &LegalSizes = LegalStoreSizes[AS];
+
+#ifndef NDEBUG
+ for (auto StoreMI : StoresToMerge)
+ assert(MRI->getType(StoreMI->getValueReg()) == OrigTy);
+#endif
+
+ const auto &DL = MF->getFunction().getParent()->getDataLayout();
+ bool AnyMerged = false;
+ do {
+ unsigned NumPow2 = PowerOf2Floor(StoresToMerge.size());
+ unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedSize();
+ // Compute the biggest store we can generate to handle the number of stores.
+ unsigned MergeSizeBits;
+ for (MergeSizeBits = MaxSizeBits; MergeSizeBits > 1; MergeSizeBits /= 2) {
+ LLT StoreTy = LLT::scalar(MergeSizeBits);
+ EVT StoreEVT =
+ getApproximateEVTForLLT(StoreTy, DL, MF->getFunction().getContext());
+ if (LegalSizes.size() > MergeSizeBits && LegalSizes[MergeSizeBits] &&
+ TLI->canMergeStoresTo(AS, StoreEVT, *MF) &&
+ (TLI->isTypeLegal(StoreEVT)))
+ break; // We can generate a MergeSize bits store.
+ }
+ if (MergeSizeBits <= OrigTy.getSizeInBits())
+ return AnyMerged; // No greater merge.
+
+ unsigned NumStoresToMerge = MergeSizeBits / OrigTy.getSizeInBits();
+ // Perform the actual merging.
+ SmallVector<GStore *, 8> SingleMergeStores(
+ StoresToMerge.begin(), StoresToMerge.begin() + NumStoresToMerge);
+ AnyMerged |= doSingleStoreMerge(SingleMergeStores);
+ StoresToMerge.erase(StoresToMerge.begin(),
+ StoresToMerge.begin() + NumStoresToMerge);
+ } while (StoresToMerge.size() > 1);
+ return AnyMerged;
+}
+
+bool LoadStoreOpt::isLegalOrBeforeLegalizer(const LegalityQuery &Query,
+ MachineFunction &MF) const {
+ auto Action = LI->getAction(Query).Action;
+ // If the instruction is unsupported, it can't be legalized at all.
+ if (Action == LegalizeActions::Unsupported)
+ return false;
+ return IsPreLegalizer || Action == LegalizeAction::Legal;
+}
+
+bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) {
+ assert(Stores.size() > 1);
+ // We know that all the stores are consecutive and there are no aliasing
+ // operations in the range. However, the values that are being stored may be
+ // generated anywhere before each store. To ensure we have the values
+ // available, we materialize the wide value and new store at the place of the
+ // final store in the merge sequence.
+ GStore *FirstStore = Stores[0];
+ const unsigned NumStores = Stores.size();
+ LLT SmallTy = MRI->getType(FirstStore->getValueReg());
+ LLT WideValueTy =
+ LLT::scalar(NumStores * SmallTy.getSizeInBits().getFixedSize());
+
+ // For each store, compute pairwise merged debug locs.
+ DebugLoc MergedLoc;
+ for (unsigned AIdx = 0, BIdx = 1; BIdx < NumStores; ++AIdx, ++BIdx)
+ MergedLoc = DILocation::getMergedLocation(Stores[AIdx]->getDebugLoc(),
+ Stores[BIdx]->getDebugLoc());
+ Builder.setInstr(*Stores.back());
+ Builder.setDebugLoc(MergedLoc);
+
+ // If all of the store values are constants, then create a wide constant
+ // directly. Otherwise, we need to generate some instructions to merge the
+ // existing values together into a wider type.
+ SmallVector<APInt, 8> ConstantVals;
+ for (auto Store : Stores) {
+ auto MaybeCst =
+ getIConstantVRegValWithLookThrough(Store->getValueReg(), *MRI);
+ if (!MaybeCst) {
+ ConstantVals.clear();
+ break;
+ }
+ ConstantVals.emplace_back(MaybeCst->Value);
+ }
+
+ Register WideReg;
+ auto *WideMMO =
+ MF->getMachineMemOperand(&FirstStore->getMMO(), 0, WideValueTy);
+ if (ConstantVals.empty()) {
+ // Mimic the SDAG behaviour here and don't try to do anything for unknown
+ // values. In future, we should also support the cases of loads and
+ // extracted vector elements.
+ return false;
+ }
+
+ assert(ConstantVals.size() == NumStores);
+ // Check if our wide constant is legal.
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {WideValueTy}}, *MF))
+ return false;
+ APInt WideConst(WideValueTy.getSizeInBits(), 0);
+ for (unsigned Idx = 0; Idx < ConstantVals.size(); ++Idx) {
+ // Insert the smaller constant into the corresponding position in the
+ // wider one.
+ WideConst.insertBits(ConstantVals[Idx], Idx * SmallTy.getSizeInBits());
+ }
+ WideReg = Builder.buildConstant(WideValueTy, WideConst).getReg(0);
+ auto NewStore =
+ Builder.buildStore(WideReg, FirstStore->getPointerReg(), *WideMMO);
+ (void) NewStore;
+ LLVM_DEBUG(dbgs() << "Created merged store: " << *NewStore);
+ NumStoresMerged += Stores.size();
+
+ MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
+ MORE.emit([&]() {
+ MachineOptimizationRemark R(DEBUG_TYPE, "MergedStore",
+ FirstStore->getDebugLoc(),
+ FirstStore->getParent());
+ R << "Merged " << NV("NumMerged", Stores.size()) << " stores of "
+ << NV("OrigWidth", SmallTy.getSizeInBytes())
+ << " bytes into a single store of "
+ << NV("NewWidth", WideValueTy.getSizeInBytes()) << " bytes";
+ return R;
+ });
+
+ for (auto MI : Stores)
+ InstsToErase.insert(MI);
+ return true;
+}
+
+bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &C) {
+ if (C.Stores.size() < 2) {
+ C.reset();
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "Checking store merge candidate with " << C.Stores.size()
+ << " stores, starting with " << *C.Stores[0]);
+ // We know that the stores in the candidate are adjacent.
+ // Now we need to check if any potential aliasing instructions recorded
+ // during the search alias with load/stores added to the candidate after.
+ // For example, if we have the candidate:
+ // C.Stores = [ST1, ST2, ST3, ST4]
+ // and after seeing ST2 we saw a load LD1, which did not alias with ST1 or
+ // ST2, then we would have recorded it into the PotentialAliases structure
+ // with the associated index value of "1". Then we see ST3 and ST4 and add
+ // them to the candidate group. We know that LD1 does not alias with ST1 or
+ // ST2, since we already did that check. However we don't yet know if it
+ // may alias ST3 and ST4, so we perform those checks now.
+ SmallVector<GStore *> StoresToMerge;
+
+ auto DoesStoreAliasWithPotential = [&](unsigned Idx, GStore &CheckStore) {
+ for (auto AliasInfo : reverse(C.PotentialAliases)) {
+ MachineInstr *PotentialAliasOp = AliasInfo.first;
+ unsigned PreCheckedIdx = AliasInfo.second;
+ if (static_cast<unsigned>(Idx) > PreCheckedIdx) {
+ // Need to check this alias.
+ if (GISelAddressing::instMayAlias(CheckStore, *PotentialAliasOp, *MRI,
+ AA)) {
+ LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp
+ << " detected\n");
+ return true;
+ }
+ } else {
+ // Once our store index is lower than the index associated with the
+ // potential alias, we know that we've already checked for this alias
+ // and all of the earlier potential aliases too.
+ return false;
+ }
+ }
+ return false;
+ };
+ // Start from the last store in the group, and check if it aliases with any
+ // of the potential aliasing operations in the list.
+ for (int StoreIdx = C.Stores.size() - 1; StoreIdx >= 0; --StoreIdx) {
+ auto *CheckStore = C.Stores[StoreIdx];
+ if (DoesStoreAliasWithPotential(StoreIdx, *CheckStore))
+ continue;
+ StoresToMerge.emplace_back(CheckStore);
+ }
+
+ LLVM_DEBUG(dbgs() << StoresToMerge.size()
+ << " stores remaining after alias checks. Merging...\n");
+
+ // Now we've checked for aliasing hazards, merge any stores left.
+ C.reset();
+ if (StoresToMerge.size() < 2)
+ return false;
+ return mergeStores(StoresToMerge);
+}
+
+bool LoadStoreOpt::operationAliasesWithCandidate(MachineInstr &MI,
+ StoreMergeCandidate &C) {
+ if (C.Stores.empty())
+ return false;
+ return llvm::any_of(C.Stores, [&](MachineInstr *OtherMI) {
+ return instMayAlias(MI, *OtherMI, *MRI, AA);
+ });
+}
+
+void LoadStoreOpt::StoreMergeCandidate::addPotentialAlias(MachineInstr &MI) {
+ PotentialAliases.emplace_back(std::make_pair(&MI, Stores.size() - 1));
+}
+
+bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
+ StoreMergeCandidate &C) {
+ // Check if the given store writes to an adjacent address, and other
+ // requirements.
+ LLT ValueTy = MRI->getType(StoreMI.getValueReg());
+ LLT PtrTy = MRI->getType(StoreMI.getPointerReg());
+
+ // Only handle scalars.
+ if (!ValueTy.isScalar())
+ return false;
+
+ // Don't allow truncating stores for now.
+ if (StoreMI.getMemSizeInBits() != ValueTy.getSizeInBits())
+ return false;
+
+ Register StoreAddr = StoreMI.getPointerReg();
+ auto BIO = getPointerInfo(StoreAddr, *MRI);
+ Register StoreBase = BIO.BaseReg;
+ uint64_t StoreOffCst = BIO.Offset;
+ if (C.Stores.empty()) {
+ // This is the first store of the candidate.
+ // If the offset can't possibly allow for a lower addressed store with the
+ // same base, don't bother adding it.
+ if (StoreOffCst < ValueTy.getSizeInBytes())
+ return false;
+ C.BasePtr = StoreBase;
+ C.CurrentLowestOffset = StoreOffCst;
+ C.Stores.emplace_back(&StoreMI);
+ LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: "
+ << StoreMI);
+ return true;
+ }
+
+ // Check the store is the same size as the existing ones in the candidate.
+ if (MRI->getType(C.Stores[0]->getValueReg()).getSizeInBits() !=
+ ValueTy.getSizeInBits())
+ return false;
+
+ if (MRI->getType(C.Stores[0]->getPointerReg()).getAddressSpace() !=
+ PtrTy.getAddressSpace())
+ return false;
+
+ // There are other stores in the candidate. Check that the store address
+ // writes to the next lowest adjacent address.
+ if (C.BasePtr != StoreBase)
+ return false;
+ if ((C.CurrentLowestOffset - ValueTy.getSizeInBytes()) !=
+ static_cast<uint64_t>(StoreOffCst))
+ return false;
+
+ // This writes to an adjacent address. Allow it.
+ C.Stores.emplace_back(&StoreMI);
+ C.CurrentLowestOffset = C.CurrentLowestOffset - ValueTy.getSizeInBytes();
+ LLVM_DEBUG(dbgs() << "Candidate added store: " << StoreMI);
+ return true;
+}
+
+bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ // Walk through the block bottom-up, looking for merging candidates.
+ StoreMergeCandidate Candidate;
+ for (auto II = MBB.rbegin(), IE = MBB.rend(); II != IE; ++II) {
+ MachineInstr &MI = *II;
+ if (InstsToErase.contains(&MI))
+ continue;
+
+ if (auto StoreMI = dyn_cast<GStore>(&*II)) {
+ // We have a G_STORE. Add it to the candidate if it writes to an adjacent
+ // address.
+ if (!addStoreToCandidate(*StoreMI, Candidate)) {
+ // Store wasn't eligible to be added. May need to record it as a
+ // potential alias.
+ if (operationAliasesWithCandidate(*StoreMI, Candidate)) {
+ Changed |= processMergeCandidate(Candidate);
+ continue;
+ }
+ Candidate.addPotentialAlias(*StoreMI);
+ }
+ continue;
+ }
+
+ // If we don't have any stores yet, this instruction can't pose a problem.
+ if (Candidate.Stores.empty())
+ continue;
+
+ // We're dealing with some other kind of instruction.
+ if (isInstHardMergeHazard(MI)) {
+ Changed |= processMergeCandidate(Candidate);
+ Candidate.Stores.clear();
+ continue;
+ }
+
+ if (!MI.mayLoadOrStore())
+ continue;
+
+ if (operationAliasesWithCandidate(MI, Candidate)) {
+ // We have a potential alias, so process the current candidate if we can
+ // and then continue looking for a new candidate.
+ Changed |= processMergeCandidate(Candidate);
+ continue;
+ }
+
+ // Record this instruction as a potential alias for future stores that are
+ // added to the candidate.
+ Candidate.addPotentialAlias(MI);
+ }
+
+ // Process any candidate left after finishing searching the entire block.
+ Changed |= processMergeCandidate(Candidate);
+
+ // Erase instructions now that we're no longer iterating over the block.
+ for (auto *MI : InstsToErase)
+ MI->eraseFromParent();
+ InstsToErase.clear();
+ return Changed;
+}
+
+bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) {
+ bool Changed = false;
+ for (auto &BB : MF) {
+ Changed |= mergeBlockStores(BB);
+ }
+ return Changed;
+}
+
+void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) {
+ // Query the legalizer info to record what store types are legal.
+ // We record this because we don't want to bother trying to merge stores into
+ // illegal ones, which would just result in being split again.
+
+ if (LegalStoreSizes.count(AddrSpace)) {
+ assert(LegalStoreSizes[AddrSpace].any());
+ return; // Already cached sizes for this address space.
+ }
+
+ // Need to reserve at least MaxStoreSizeToForm + 1 bits.
+ BitVector LegalSizes(MaxStoreSizeToForm * 2);
+ const auto &LI = *MF->getSubtarget().getLegalizerInfo();
+ const auto &DL = MF->getFunction().getParent()->getDataLayout();
+ Type *IntPtrIRTy =
+ DL.getIntPtrType(MF->getFunction().getContext(), AddrSpace);
+ LLT PtrTy = getLLTForType(*IntPtrIRTy->getPointerTo(AddrSpace), DL);
+ // We assume that we're not going to be generating any stores wider than
+ // MaxStoreSizeToForm bits for now.
+ for (unsigned Size = 2; Size <= MaxStoreSizeToForm; Size *= 2) {
+ LLT Ty = LLT::scalar(Size);
+ SmallVector<LegalityQuery::MemDesc, 2> MemDescrs(
+ {{Ty, Ty.getSizeInBits(), AtomicOrdering::NotAtomic}});
+ SmallVector<LLT> StoreTys({Ty, PtrTy});
+ LegalityQuery Q(TargetOpcode::G_STORE, StoreTys, MemDescrs);
+ LegalizeActionStep ActionStep = LI.getAction(Q);
+ if (ActionStep.Action == LegalizeActions::Legal)
+ LegalSizes.set(Size);
+ }
+ assert(LegalSizes.any() && "Expected some store sizes to be legal!");
+ LegalStoreSizes[AddrSpace] = LegalSizes;
+}
+
+bool LoadStoreOpt::runOnMachineFunction(MachineFunction &MF) {
+ // If the ISel pipeline failed, do not bother running that pass.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Begin memory optimizations for: " << MF.getName()
+ << '\n');
+
+ init(MF);
+ bool Changed = false;
+ Changed |= mergeFunctionStores(MF);
+
+ LegalStoreSizes.clear();
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
index d45fdae43f01..a1acc4195840 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -92,9 +92,8 @@ bool Localizer::localizeInterBlock(MachineFunction &MF,
// Check if all the users of MI are local.
// We are going to invalidation the list of use operands, so we
// can't use range iterator.
- for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end();
- MOIt != MOItEnd;) {
- MachineOperand &MOUse = *MOIt++;
+ for (MachineOperand &MOUse :
+ llvm::make_early_inc_range(MRI->use_operands(Reg))) {
// Check if the use is already local.
MachineBasicBlock *InsertMBB;
LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 54ac62793b08..fb5ed35c1f72 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -673,7 +673,8 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res,
LLT DstTy = Res.getLLTTy(*getMRI());
LLT Src1Ty = Src1.getLLTTy(*getMRI());
LLT Src2Ty = Src2.getLLTTy(*getMRI());
- assert(Src1Ty.getNumElements() + Src2Ty.getNumElements() >= Mask.size());
+ assert((size_t)(Src1Ty.getNumElements() + Src2Ty.getNumElements()) >=
+ Mask.size());
assert(DstTy.getElementType() == Src1Ty.getElementType() &&
DstTy.getElementType() == Src2Ty.getElementType());
(void)DstTy;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 644a81d8021e..937d94764be1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -699,11 +699,11 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
// Set a sensible insertion point so that subsequent calls to
// MIRBuilder.
MIRBuilder.setMBB(*MBB);
- for (MachineBasicBlock::iterator MII = MBB->begin(), End = MBB->end();
- MII != End;) {
- // MI might be invalidated by the assignment, so move the
- // iterator before hand.
- MachineInstr &MI = *MII++;
+ SmallVector<MachineInstr *> WorkList(
+ make_pointer_range(reverse(MBB->instrs())));
+
+ while (!WorkList.empty()) {
+ MachineInstr &MI = *WorkList.pop_back_val();
// Ignore target-specific post-isel instructions: they should use proper
// regclasses.
@@ -728,18 +728,6 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
"unable to map instruction", MI);
return false;
}
-
- // It's possible the mapping changed control flow, and moved the following
- // instruction to a new block, so figure out the new parent.
- if (MII != End) {
- MachineBasicBlock *NextInstBB = MII->getParent();
- if (NextInstBB != MBB) {
- LLVM_DEBUG(dbgs() << "Instruction mapping changed control flow\n");
- MBB = NextInstBB;
- MIRBuilder.setMBB(*MBB);
- End = MBB->end();
- }
- }
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index e2a963747101..1a2102e3ef21 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -570,7 +570,7 @@ bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const {
assert((ValueMask & PartMapMask) == PartMapMask &&
"Some partial mappings overlap");
}
- assert(ValueMask.isAllOnesValue() && "Value is not fully mapped");
+ assert(ValueMask.isAllOnes() && "Value is not fully mapped");
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index f64e41b9dccc..1a440c064a59 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -15,7 +15,9 @@
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -60,6 +62,8 @@ Register llvm::constrainOperandRegClass(
if (ConstrainedReg != Reg) {
MachineBasicBlock::iterator InsertIt(&InsertPt);
MachineBasicBlock &MBB = *InsertPt.getParent();
+ // FIXME: The copy needs to have the classes constrained for its operands.
+ // Use operand's regbank to get the class for old register (Reg).
if (RegMO.isUse()) {
BuildMI(MBB, InsertIt, InsertPt.getDebugLoc(),
TII.get(TargetOpcode::COPY), ConstrainedReg)
@@ -99,19 +103,25 @@ Register llvm::constrainOperandRegClass(
// Assume physical registers are properly constrained.
assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented");
- const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF);
+ const TargetRegisterClass *OpRC = TII.getRegClass(II, OpIdx, &TRI, MF);
// Some of the target independent instructions, like COPY, may not impose any
// register class constraints on some of their operands: If it's a use, we can
// skip constraining as the instruction defining the register would constrain
// it.
- // We can't constrain unallocatable register classes, because we can't create
- // virtual registers for these classes, so we need to let targets handled this
- // case.
- if (RegClass && !RegClass->isAllocatable())
- RegClass = TRI.getConstrainedRegClassForOperand(RegMO, MRI);
+ if (OpRC) {
+ // Obtain the RC from incoming regbank if it is a proper sub-class. Operands
+ // can have multiple regbanks for a superclass that combine different
+ // register types (E.g., AMDGPU's VGPR and AGPR). The regbank ambiguity
+ // resolved by targets during regbankselect should not be overridden.
+ if (const auto *SubRC = TRI.getCommonSubClass(
+ OpRC, TRI.getConstrainedRegClassForOperand(RegMO, MRI)))
+ OpRC = SubRC;
- if (!RegClass) {
+ OpRC = TRI.getAllocatableClass(OpRC);
+ }
+
+ if (!OpRC) {
assert((!isTargetSpecificOpcode(II.getOpcode()) || RegMO.isUse()) &&
"Register class constraint is required unless either the "
"instruction is target independent or the operand is a use");
@@ -127,7 +137,7 @@ Register llvm::constrainOperandRegClass(
// and they never reach this function.
return Reg;
}
- return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt, *RegClass,
+ return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt, *OpRC,
RegMO);
}
@@ -236,7 +246,7 @@ static void reportGISelDiagnostic(DiagnosticSeverity Severity,
R << (" (in function: " + MF.getName() + ")").str();
if (IsFatal)
- report_fatal_error(R.getMsg());
+ report_fatal_error(Twine(R.getMsg()));
else
MORE.emit(R);
}
@@ -267,10 +277,10 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
reportGISelFailure(MF, TPC, MORE, R);
}
-Optional<APInt> llvm::getConstantVRegVal(Register VReg,
- const MachineRegisterInfo &MRI) {
- Optional<ValueAndVReg> ValAndVReg =
- getConstantVRegValWithLookThrough(VReg, MRI, /*LookThroughInstrs*/ false);
+Optional<APInt> llvm::getIConstantVRegVal(Register VReg,
+ const MachineRegisterInfo &MRI) {
+ Optional<ValueAndVReg> ValAndVReg = getIConstantVRegValWithLookThrough(
+ VReg, MRI, /*LookThroughInstrs*/ false);
assert((!ValAndVReg || ValAndVReg->VReg == VReg) &&
"Value found while looking through instrs");
if (!ValAndVReg)
@@ -278,41 +288,27 @@ Optional<APInt> llvm::getConstantVRegVal(Register VReg,
return ValAndVReg->Value;
}
-Optional<int64_t> llvm::getConstantVRegSExtVal(Register VReg,
- const MachineRegisterInfo &MRI) {
- Optional<APInt> Val = getConstantVRegVal(VReg, MRI);
+Optional<int64_t>
+llvm::getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI) {
+ Optional<APInt> Val = getIConstantVRegVal(VReg, MRI);
if (Val && Val->getBitWidth() <= 64)
return Val->getSExtValue();
return None;
}
-Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
- Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
- bool HandleFConstant, bool LookThroughAnyExt) {
+namespace {
+
+typedef std::function<bool(const MachineInstr *)> IsOpcodeFn;
+typedef std::function<Optional<APInt>(const MachineInstr *MI)> GetAPCstFn;
+
+Optional<ValueAndVReg> getConstantVRegValWithLookThrough(
+ Register VReg, const MachineRegisterInfo &MRI, IsOpcodeFn IsConstantOpcode,
+ GetAPCstFn getAPCstValue, bool LookThroughInstrs = true,
+ bool LookThroughAnyExt = false) {
SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes;
MachineInstr *MI;
- auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) {
- return Opcode == TargetOpcode::G_CONSTANT ||
- (HandleFConstant && Opcode == TargetOpcode::G_FCONSTANT);
- };
- auto GetImmediateValue = [HandleFConstant,
- &MRI](const MachineInstr &MI) -> Optional<APInt> {
- const MachineOperand &CstVal = MI.getOperand(1);
- if (!CstVal.isImm() && !CstVal.isCImm() &&
- (!HandleFConstant || !CstVal.isFPImm()))
- return None;
- if (!CstVal.isFPImm()) {
- unsigned BitWidth =
- MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm())
- : CstVal.getCImm()->getValue();
- assert(Val.getBitWidth() == BitWidth &&
- "Value bitwidth doesn't match definition type");
- return Val;
- }
- return CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
- };
- while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) &&
+
+ while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI) &&
LookThroughInstrs) {
switch (MI->getOpcode()) {
case TargetOpcode::G_ANYEXT:
@@ -339,10 +335,10 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
return None;
}
}
- if (!MI || !IsConstantOpcode(MI->getOpcode()))
+ if (!MI || !IsConstantOpcode(MI))
return None;
- Optional<APInt> MaybeVal = GetImmediateValue(*MI);
+ Optional<APInt> MaybeVal = getAPCstValue(MI);
if (!MaybeVal)
return None;
APInt &Val = *MaybeVal;
@@ -365,12 +361,65 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
return ValueAndVReg{Val, VReg};
}
-const ConstantInt *llvm::getConstantIntVRegVal(Register VReg,
- const MachineRegisterInfo &MRI) {
- MachineInstr *MI = MRI.getVRegDef(VReg);
- if (MI->getOpcode() != TargetOpcode::G_CONSTANT)
- return nullptr;
- return MI->getOperand(1).getCImm();
+bool isIConstant(const MachineInstr *MI) {
+ if (!MI)
+ return false;
+ return MI->getOpcode() == TargetOpcode::G_CONSTANT;
+}
+
+bool isFConstant(const MachineInstr *MI) {
+ if (!MI)
+ return false;
+ return MI->getOpcode() == TargetOpcode::G_FCONSTANT;
+}
+
+bool isAnyConstant(const MachineInstr *MI) {
+ if (!MI)
+ return false;
+ unsigned Opc = MI->getOpcode();
+ return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_FCONSTANT;
+}
+
+Optional<APInt> getCImmAsAPInt(const MachineInstr *MI) {
+ const MachineOperand &CstVal = MI->getOperand(1);
+ if (CstVal.isCImm())
+ return CstVal.getCImm()->getValue();
+ return None;
+}
+
+Optional<APInt> getCImmOrFPImmAsAPInt(const MachineInstr *MI) {
+ const MachineOperand &CstVal = MI->getOperand(1);
+ if (CstVal.isCImm())
+ return CstVal.getCImm()->getValue();
+ if (CstVal.isFPImm())
+ return CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
+ return None;
+}
+
+} // end anonymous namespace
+
+Optional<ValueAndVReg> llvm::getIConstantVRegValWithLookThrough(
+ Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
+ return getConstantVRegValWithLookThrough(VReg, MRI, isIConstant,
+ getCImmAsAPInt, LookThroughInstrs);
+}
+
+Optional<ValueAndVReg> llvm::getAnyConstantVRegValWithLookThrough(
+ Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
+ bool LookThroughAnyExt) {
+ return getConstantVRegValWithLookThrough(
+ VReg, MRI, isAnyConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs,
+ LookThroughAnyExt);
+}
+
+Optional<FPValueAndVReg> llvm::getFConstantVRegValWithLookThrough(
+ Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
+ auto Reg = getConstantVRegValWithLookThrough(
+ VReg, MRI, isFConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs);
+ if (!Reg)
+ return None;
+ return FPValueAndVReg{getConstantFPVRegVal(Reg->VReg, MRI)->getValueAPF(),
+ Reg->VReg};
}
const ConstantFP *
@@ -437,16 +486,16 @@ APFloat llvm::getAPFloatFromSize(double Val, unsigned Size) {
Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1,
const Register Op2,
const MachineRegisterInfo &MRI) {
- auto MaybeOp2Cst = getConstantVRegVal(Op2, MRI);
+ auto MaybeOp2Cst = getAnyConstantVRegValWithLookThrough(Op2, MRI, false);
if (!MaybeOp2Cst)
return None;
- auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI);
+ auto MaybeOp1Cst = getAnyConstantVRegValWithLookThrough(Op1, MRI, false);
if (!MaybeOp1Cst)
return None;
- const APInt &C1 = *MaybeOp1Cst;
- const APInt &C2 = *MaybeOp2Cst;
+ const APInt &C1 = MaybeOp1Cst->Value;
+ const APInt &C2 = MaybeOp2Cst->Value;
switch (Opcode) {
default:
break;
@@ -543,6 +592,35 @@ Optional<APFloat> llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
return None;
}
+Optional<MachineInstr *>
+llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
+ const Register Op2,
+ const MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIB) {
+ auto *SrcVec1 = getOpcodeDef<GBuildVector>(Op1, MRI);
+ if (!SrcVec1)
+ return None;
+ auto *SrcVec2 = getOpcodeDef<GBuildVector>(Op2, MRI);
+ if (!SrcVec2)
+ return None;
+
+ const LLT EltTy = MRI.getType(SrcVec1->getSourceReg(0));
+
+ SmallVector<Register, 16> FoldedElements;
+ for (unsigned Idx = 0, E = SrcVec1->getNumSources(); Idx < E; ++Idx) {
+ auto MaybeCst = ConstantFoldBinOp(Opcode, SrcVec1->getSourceReg(Idx),
+ SrcVec2->getSourceReg(Idx), MRI);
+ if (!MaybeCst)
+ return None;
+ auto FoldedCstReg = MIB.buildConstant(EltTy, *MaybeCst).getReg(0);
+ FoldedElements.emplace_back(FoldedCstReg);
+ }
+ // Create the new vector constant.
+ auto CstVec =
+ MIB.buildBuildVector(MRI.getType(SrcVec1->getReg(0)), FoldedElements);
+ return &*CstVec;
+}
+
bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
bool SNaN) {
const MachineInstr *DefMI = MRI.getVRegDef(Val);
@@ -659,7 +737,7 @@ Register llvm::getFunctionLiveInPhysReg(MachineFunction &MF,
Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1,
uint64_t Imm,
const MachineRegisterInfo &MRI) {
- auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI);
+ auto MaybeOp1Cst = getIConstantVRegVal(Op1, MRI);
if (MaybeOp1Cst) {
switch (Opcode) {
default:
@@ -677,7 +755,7 @@ Optional<APFloat> llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
Register Src,
const MachineRegisterInfo &MRI) {
assert(Opcode == TargetOpcode::G_SITOFP || Opcode == TargetOpcode::G_UITOFP);
- if (auto MaybeSrcVal = getConstantVRegVal(Src, MRI)) {
+ if (auto MaybeSrcVal = getIConstantVRegVal(Src, MRI)) {
APFloat DstVal(getFltSemanticForLLT(DstTy));
DstVal.convertFromAPInt(*MaybeSrcVal, Opcode == TargetOpcode::G_SITOFP,
APFloat::rmNearestTiesToEven);
@@ -686,6 +764,37 @@ Optional<APFloat> llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
return None;
}
+Optional<SmallVector<unsigned>>
+llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) {
+ LLT Ty = MRI.getType(Src);
+ SmallVector<unsigned> FoldedCTLZs;
+ auto tryFoldScalar = [&](Register R) -> Optional<unsigned> {
+ auto MaybeCst = getIConstantVRegVal(R, MRI);
+ if (!MaybeCst)
+ return None;
+ return MaybeCst->countLeadingZeros();
+ };
+ if (Ty.isVector()) {
+ // Try to constant fold each element.
+ auto *BV = getOpcodeDef<GBuildVector>(Src, MRI);
+ if (!BV)
+ return None;
+ for (unsigned SrcIdx = 0; SrcIdx < BV->getNumSources(); ++SrcIdx) {
+ if (auto MaybeFold = tryFoldScalar(BV->getSourceReg(SrcIdx))) {
+ FoldedCTLZs.emplace_back(*MaybeFold);
+ continue;
+ }
+ return None;
+ }
+ return FoldedCTLZs;
+ }
+ if (auto MaybeCst = tryFoldScalar(Src)) {
+ FoldedCTLZs.emplace_back(*MaybeCst);
+ return FoldedCTLZs;
+ }
+ return None;
+}
+
bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
GISelKnownBits *KB) {
Optional<DefinitionAndSourceRegister> DefSrcReg =
@@ -707,7 +816,7 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
// shifting the bit off the end is undefined.
// TODO: Constant splat
- if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
+ if (auto ConstLHS = getIConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
if (*ConstLHS == 1)
return true;
}
@@ -715,7 +824,7 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
break;
}
case TargetOpcode::G_LSHR: {
- if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
+ if (auto ConstLHS = getIConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
if (ConstLHS->isSignMask())
return true;
}
@@ -737,7 +846,7 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
// zeros is greater than the truncation amount.
const unsigned BitWidth = Ty.getScalarSizeInBits();
for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
- auto Const = getConstantVRegVal(MI.getOperand(I).getReg(), MRI);
+ auto Const = getIConstantVRegVal(MI.getOperand(I).getReg(), MRI);
if (!Const || !Const->zextOrTrunc(BitWidth).isPowerOf2())
return false;
}
@@ -885,53 +994,81 @@ static bool isBuildVectorOp(unsigned Opcode) {
Opcode == TargetOpcode::G_BUILD_VECTOR_TRUNC;
}
-// TODO: Handle mixed undef elements.
-static bool isBuildVectorConstantSplat(const MachineInstr &MI,
- const MachineRegisterInfo &MRI,
- int64_t SplatValue) {
- if (!isBuildVectorOp(MI.getOpcode()))
- return false;
+namespace {
- const unsigned NumOps = MI.getNumOperands();
- for (unsigned I = 1; I != NumOps; ++I) {
- Register Element = MI.getOperand(I).getReg();
- if (!mi_match(Element, MRI, m_SpecificICst(SplatValue)))
- return false;
+Optional<ValueAndVReg> getAnyConstantSplat(Register VReg,
+ const MachineRegisterInfo &MRI,
+ bool AllowUndef) {
+ MachineInstr *MI = getDefIgnoringCopies(VReg, MRI);
+ if (!MI)
+ return None;
+
+ if (!isBuildVectorOp(MI->getOpcode()))
+ return None;
+
+ Optional<ValueAndVReg> SplatValAndReg = None;
+ for (MachineOperand &Op : MI->uses()) {
+ Register Element = Op.getReg();
+ auto ElementValAndReg =
+ getAnyConstantVRegValWithLookThrough(Element, MRI, true, true);
+
+ // If AllowUndef, treat undef as value that will result in a constant splat.
+ if (!ElementValAndReg) {
+ if (AllowUndef && isa<GImplicitDef>(MRI.getVRegDef(Element)))
+ continue;
+ return None;
+ }
+
+ // Record splat value
+ if (!SplatValAndReg)
+ SplatValAndReg = ElementValAndReg;
+
+ // Different constant then the one already recorded, not a constant splat.
+ if (SplatValAndReg->Value != ElementValAndReg->Value)
+ return None;
}
- return true;
+ return SplatValAndReg;
}
+bool isBuildVectorConstantSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ int64_t SplatValue, bool AllowUndef) {
+ if (auto SplatValAndReg =
+ getAnyConstantSplat(MI.getOperand(0).getReg(), MRI, AllowUndef))
+ return mi_match(SplatValAndReg->VReg, MRI, m_SpecificICst(SplatValue));
+ return false;
+}
+
+} // end anonymous namespace
+
Optional<int64_t>
llvm::getBuildVectorConstantSplat(const MachineInstr &MI,
const MachineRegisterInfo &MRI) {
- if (!isBuildVectorOp(MI.getOpcode()))
- return None;
-
- const unsigned NumOps = MI.getNumOperands();
- Optional<int64_t> Scalar;
- for (unsigned I = 1; I != NumOps; ++I) {
- Register Element = MI.getOperand(I).getReg();
- int64_t ElementValue;
- if (!mi_match(Element, MRI, m_ICst(ElementValue)))
- return None;
- if (!Scalar)
- Scalar = ElementValue;
- else if (*Scalar != ElementValue)
- return None;
- }
+ if (auto SplatValAndReg =
+ getAnyConstantSplat(MI.getOperand(0).getReg(), MRI, false))
+ return getIConstantVRegSExtVal(SplatValAndReg->VReg, MRI);
+ return None;
+}
- return Scalar;
+Optional<FPValueAndVReg> llvm::getFConstantSplat(Register VReg,
+ const MachineRegisterInfo &MRI,
+ bool AllowUndef) {
+ if (auto SplatValAndReg = getAnyConstantSplat(VReg, MRI, AllowUndef))
+ return getFConstantVRegValWithLookThrough(SplatValAndReg->VReg, MRI);
+ return None;
}
bool llvm::isBuildVectorAllZeros(const MachineInstr &MI,
- const MachineRegisterInfo &MRI) {
- return isBuildVectorConstantSplat(MI, MRI, 0);
+ const MachineRegisterInfo &MRI,
+ bool AllowUndef) {
+ return isBuildVectorConstantSplat(MI, MRI, 0, AllowUndef);
}
bool llvm::isBuildVectorAllOnes(const MachineInstr &MI,
- const MachineRegisterInfo &MRI) {
- return isBuildVectorConstantSplat(MI, MRI, -1);
+ const MachineRegisterInfo &MRI,
+ bool AllowUndef) {
+ return isBuildVectorConstantSplat(MI, MRI, -1, AllowUndef);
}
Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI,
@@ -948,6 +1085,36 @@ Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI,
return RegOrConstant(Reg);
}
+bool llvm::isConstantOrConstantVector(MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ Register Def = MI.getOperand(0).getReg();
+ if (auto C = getIConstantVRegValWithLookThrough(Def, MRI))
+ return true;
+ GBuildVector *BV = dyn_cast<GBuildVector>(&MI);
+ if (!BV)
+ return false;
+ for (unsigned SrcIdx = 0; SrcIdx < BV->getNumSources(); ++SrcIdx) {
+ if (getIConstantVRegValWithLookThrough(BV->getSourceReg(SrcIdx), MRI) ||
+ getOpcodeDef<GImplicitDef>(BV->getSourceReg(SrcIdx), MRI))
+ continue;
+ return false;
+ }
+ return true;
+}
+
+Optional<APInt>
+llvm::isConstantOrConstantSplatVector(MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ Register Def = MI.getOperand(0).getReg();
+ if (auto C = getIConstantVRegValWithLookThrough(Def, MRI))
+ return C->Value;
+ auto MaybeCst = getBuildVectorConstantSplat(MI, MRI);
+ if (!MaybeCst)
+ return None;
+ const unsigned ScalarSize = MRI.getType(Def).getScalarSizeInBits();
+ return APInt(ScalarSize, *MaybeCst, true);
+}
+
bool llvm::matchUnaryPredicate(
const MachineRegisterInfo &MRI, Register Reg,
std::function<bool(const Constant *ConstVal)> Match, bool AllowUndefs) {
@@ -1011,3 +1178,59 @@ bool llvm::shouldOptForSize(const MachineBasicBlock &MBB,
return F.hasOptSize() || F.hasMinSize() ||
llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI);
}
+
+/// These artifacts generally don't have any debug users because they don't
+/// directly originate from IR instructions, but instead usually from
+/// legalization. Avoiding checking for debug users improves compile time.
+/// Note that truncates or extends aren't included because they have IR
+/// counterparts which can have debug users after translation.
+static bool shouldSkipDbgValueFor(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_UNMERGE_VALUES:
+ case TargetOpcode::G_MERGE_VALUES:
+ case TargetOpcode::G_CONCAT_VECTORS:
+ case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_EXTRACT:
+ case TargetOpcode::G_INSERT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void llvm::saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LostDebugLocObserver *LocObserver,
+ SmallInstListTy &DeadInstChain) {
+ for (MachineOperand &Op : MI.uses()) {
+ if (Op.isReg() && Op.getReg().isVirtual())
+ DeadInstChain.insert(MRI.getVRegDef(Op.getReg()));
+ }
+ LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
+ DeadInstChain.remove(&MI);
+ if (shouldSkipDbgValueFor(MI))
+ MI.eraseFromParent();
+ else
+ MI.eraseFromParentAndMarkDBGValuesForRemoval();
+ if (LocObserver)
+ LocObserver->checkpoint(false);
+}
+
+void llvm::eraseInstrs(ArrayRef<MachineInstr *> DeadInstrs,
+ MachineRegisterInfo &MRI,
+ LostDebugLocObserver *LocObserver) {
+ SmallInstListTy DeadInstChain;
+ for (MachineInstr *MI : DeadInstrs)
+ saveUsesAndErase(*MI, MRI, LocObserver, DeadInstChain);
+
+ while (!DeadInstChain.empty()) {
+ MachineInstr *Inst = DeadInstChain.pop_back_val();
+ if (!isTriviallyDead(*Inst, MRI))
+ continue;
+ saveUsesAndErase(*Inst, MRI, LocObserver, DeadInstChain);
+ }
+}
+
+void llvm::eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LostDebugLocObserver *LocObserver) {
+ return eraseInstrs({&MI}, MRI, LocObserver);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
index 248ef6c23974..83b8c2d0eacb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
@@ -365,7 +365,13 @@ static bool CanGenerateTest(Loop *L, Value *Count) {
return false;
};
- if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1))
+ // Check if Count is a zext.
+ Value *CountBefZext =
+ isa<ZExtInst>(Count) ? cast<ZExtInst>(Count)->getOperand(0) : nullptr;
+
+ if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1) &&
+ !IsCompareZero(ICmp, CountBefZext, 0) &&
+ !IsCompareZero(ICmp, CountBefZext, 1))
return false;
unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
index 71e91b445d9a..64e1f4351456 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -341,9 +341,8 @@ void InlineSpiller::collectRegsToSpill() {
if (Original == Reg)
return;
- for (MachineRegisterInfo::reg_instr_iterator
- RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); RI != E; ) {
- MachineInstr &MI = *RI++;
+ for (MachineInstr &MI :
+ llvm::make_early_inc_range(MRI.reg_instructions(Reg))) {
Register SnipReg = isFullCopyOf(MI, Reg);
if (!isSibling(SnipReg))
continue;
@@ -465,10 +464,8 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
LLVM_DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n');
// Find all spills and copies of VNI.
- for (MachineRegisterInfo::use_instr_nodbg_iterator
- UI = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end();
- UI != E; ) {
- MachineInstr &MI = *UI++;
+ for (MachineInstr &MI :
+ llvm::make_early_inc_range(MRI.use_nodbg_instructions(Reg))) {
if (!MI.isCopy() && !MI.mayStore())
continue;
SlotIndex Idx = LIS.getInstructionIndex(MI);
@@ -676,11 +673,7 @@ void InlineSpiller::reMaterializeAll() {
bool anyRemat = false;
for (Register Reg : RegsToSpill) {
LiveInterval &LI = LIS.getInterval(Reg);
- for (MachineRegisterInfo::reg_bundle_iterator
- RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end();
- RegI != E; ) {
- MachineInstr &MI = *RegI++;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MRI.reg_bundles(Reg))) {
// Debug values are not allowed to affect codegen.
if (MI.isDebugValue())
continue;
@@ -928,6 +921,39 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
// Update the call site info.
if (MI->isCandidateForCallSiteEntry())
MI->getMF()->moveCallSiteInfo(MI, FoldMI);
+
+ // If we've folded a store into an instruction labelled with debug-info,
+ // record a substitution from the old operand to the memory operand. Handle
+ // the simple common case where operand 0 is the one being folded, plus when
+ // the destination operand is also a tied def. More values could be
+ // substituted / preserved with more analysis.
+ if (MI->peekDebugInstrNum() && Ops[0].second == 0) {
+ // Helper lambda.
+ auto MakeSubstitution = [this,FoldMI,MI,&Ops]() {
+ // Substitute old operand zero to the new instructions memory operand.
+ unsigned OldOperandNum = Ops[0].second;
+ unsigned NewNum = FoldMI->getDebugInstrNum();
+ unsigned OldNum = MI->getDebugInstrNum();
+ MF.makeDebugValueSubstitution({OldNum, OldOperandNum},
+ {NewNum, MachineFunction::DebugOperandMemNumber});
+ };
+
+ const MachineOperand &Op0 = MI->getOperand(Ops[0].second);
+ if (Ops.size() == 1 && Op0.isDef()) {
+ MakeSubstitution();
+ } else if (Ops.size() == 2 && Op0.isDef() && MI->getOperand(1).isTied() &&
+ Op0.getReg() == MI->getOperand(1).getReg()) {
+ MakeSubstitution();
+ }
+ } else if (MI->peekDebugInstrNum()) {
+ // This is a debug-labelled instruction, but the operand being folded isn't
+ // at operand zero. Most likely this means it's a load being folded in.
+ // Substitute any register defs from operand zero up to the one being
+ // folded -- past that point, we don't know what the new operand indexes
+ // will be.
+ MF.substituteDebugValuesForInst(*MI, *FoldMI, Ops[0].second);
+ }
+
MI->eraseFromParent();
// Insert any new instructions other than FoldMI into the LIS maps.
@@ -1038,57 +1064,53 @@ void InlineSpiller::spillAroundUses(Register Reg) {
LiveInterval &OldLI = LIS.getInterval(Reg);
// Iterate over instructions using Reg.
- for (MachineRegisterInfo::reg_bundle_iterator
- RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end();
- RegI != E; ) {
- MachineInstr *MI = &*(RegI++);
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MRI.reg_bundles(Reg))) {
// Debug values are not allowed to affect codegen.
- if (MI->isDebugValue()) {
+ if (MI.isDebugValue()) {
// Modify DBG_VALUE now that the value is in a spill slot.
- MachineBasicBlock *MBB = MI->getParent();
- LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:\t" << *MI);
- buildDbgValueForSpill(*MBB, MI, *MI, StackSlot, Reg);
+ MachineBasicBlock *MBB = MI.getParent();
+ LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:\t" << MI);
+ buildDbgValueForSpill(*MBB, &MI, MI, StackSlot, Reg);
MBB->erase(MI);
continue;
}
- assert(!MI->isDebugInstr() && "Did not expect to find a use in debug "
+ assert(!MI.isDebugInstr() && "Did not expect to find a use in debug "
"instruction that isn't a DBG_VALUE");
// Ignore copies to/from snippets. We'll delete them.
- if (SnippetCopies.count(MI))
+ if (SnippetCopies.count(&MI))
continue;
// Stack slot accesses may coalesce away.
- if (coalesceStackAccess(MI, Reg))
+ if (coalesceStackAccess(&MI, Reg))
continue;
// Analyze instruction.
SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
- VirtRegInfo RI = AnalyzeVirtRegInBundle(*MI, Reg, &Ops);
+ VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, Reg, &Ops);
// Find the slot index where this instruction reads and writes OldLI.
// This is usually the def slot, except for tied early clobbers.
- SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot();
+ SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true)))
if (SlotIndex::isSameInstr(Idx, VNI->def))
Idx = VNI->def;
// Check for a sibling copy.
- Register SibReg = isFullCopyOf(*MI, Reg);
+ Register SibReg = isFullCopyOf(MI, Reg);
if (SibReg && isSibling(SibReg)) {
// This may actually be a copy between snippets.
if (isRegToSpill(SibReg)) {
- LLVM_DEBUG(dbgs() << "Found new snippet copy: " << *MI);
- SnippetCopies.insert(MI);
+ LLVM_DEBUG(dbgs() << "Found new snippet copy: " << MI);
+ SnippetCopies.insert(&MI);
continue;
}
if (RI.Writes) {
- if (hoistSpillInsideBB(OldLI, *MI)) {
+ if (hoistSpillInsideBB(OldLI, MI)) {
// This COPY is now dead, the value is already in the stack slot.
- MI->getOperand(0).setIsDead();
- DeadDefs.push_back(MI);
+ MI.getOperand(0).setIsDead();
+ DeadDefs.push_back(&MI);
continue;
}
} else {
@@ -1108,7 +1130,7 @@ void InlineSpiller::spillAroundUses(Register Reg) {
Register NewVReg = Edit->createFrom(Reg);
if (RI.Reads)
- insertReload(NewVReg, Idx, MI);
+ insertReload(NewVReg, Idx, &MI);
// Rewrite instruction operands.
bool hasLiveDef = false;
@@ -1123,12 +1145,12 @@ void InlineSpiller::spillAroundUses(Register Reg) {
hasLiveDef = true;
}
}
- LLVM_DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI << '\n');
+ LLVM_DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << MI << '\n');
// FIXME: Use a second vreg if instruction has no tied ops.
if (RI.Writes)
if (hasLiveDef)
- insertSpill(NewVReg, true, MI);
+ insertSpill(NewVReg, true, &MI);
}
}
@@ -1163,10 +1185,8 @@ void InlineSpiller::spillAll() {
// Finally delete the SnippetCopies.
for (Register Reg : RegsToSpill) {
- for (MachineRegisterInfo::reg_instr_iterator
- RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end();
- RI != E; ) {
- MachineInstr &MI = *(RI++);
+ for (MachineInstr &MI :
+ llvm::make_early_inc_range(MRI.reg_instructions(Reg))) {
assert(SnippetCopies.count(&MI) && "Remaining use wasn't a snippet copy");
// FIXME: Do this with a LiveRangeEdit callback.
LIS.RemoveMachineInstrFromMaps(MI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 24a57cc21c57..5a20580e5479 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -95,7 +95,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.setPreservesCFG();
}
private:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index 71bfb1d87d66..9fabcfb1f326 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -308,12 +308,12 @@ public:
}
// Multiplying by one is a no-op.
- if (C.isOneValue()) {
+ if (C.isOne()) {
return *this;
}
// Multiplying by zero removes the coefficient B and defines all bits.
- if (C.isNullValue()) {
+ if (C.isZero()) {
ErrorMSBs = 0;
deleteB();
}
@@ -464,7 +464,7 @@ public:
return *this;
}
- if (C.isNullValue())
+ if (C.isZero())
return *this;
// Test if the result will be zero
@@ -571,7 +571,7 @@ public:
bool isProvenEqualTo(const Polynomial &o) {
// Subtract both polynomials and test if it is fully defined and zero.
Polynomial r = *this - o;
- return (r.ErrorMSBs == 0) && (!r.isFirstOrder()) && (r.A.isNullValue());
+ return (r.ErrorMSBs == 0) && (!r.isFirstOrder()) && (r.A.isZero());
}
/// Print the polynomial into a stream.
@@ -1131,6 +1131,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
InstructionCost InterleavedCost;
InstructionCost InstructionCost = 0;
+ const TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency;
// Get the interleave factor
unsigned Factor = InterleavedLoad.size();
@@ -1158,8 +1159,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
// be expected. Also sum the cost of the Instructions beeing left dead.
for (auto &I : Is) {
// Compute the old cost
- InstructionCost +=
- TTI.getInstructionCost(I, TargetTransformInfo::TCK_Latency);
+ InstructionCost += TTI.getInstructionCost(I, CostKind);
// The final SVIs are allowed not to be dead, all uses will be replaced
if (SVIs.find(I) != SVIs.end())
@@ -1212,7 +1212,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
Indices.push_back(i);
InterleavedCost = TTI.getInterleavedMemoryOpCost(
Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlign(),
- InsertionPoint->getPointerAddressSpace());
+ InsertionPoint->getPointerAddressSpace(), CostKind);
if (InterleavedCost >= InstructionCost) {
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 55089d3b90d0..808a79d9792a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -453,8 +453,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
// Verify this is a simple bswap.
- if (CI->getNumArgOperands() != 1 ||
- CI->getType() != CI->getArgOperand(0)->getType() ||
+ if (CI->arg_size() != 1 || CI->getType() != CI->getArgOperand(0)->getType() ||
!CI->getType()->isIntegerTy())
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 37c0b44ea2b2..0d3685d4141c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -25,10 +25,10 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index dc9907058340..a4eb3094612b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -11,114 +11,48 @@
/// LiveDebugValues.cpp and VarLocBasedImpl.cpp for more information.
///
/// This pass propagates variable locations between basic blocks, resolving
-/// control flow conflicts between them. The problem is much like SSA
-/// construction, where each DBG_VALUE instruction assigns the *value* that
-/// a variable has, and every instruction where the variable is in scope uses
-/// that variable. The resulting map of instruction-to-value is then translated
-/// into a register (or spill) location for each variable over each instruction.
+/// control flow conflicts between them. The problem is SSA construction, where
+/// each debug instruction assigns the *value* that a variable has, and every
+/// instruction where the variable is in scope uses that variable. The resulting
+/// map of instruction-to-value is then translated into a register (or spill)
+/// location for each variable over each instruction.
///
-/// This pass determines which DBG_VALUE dominates which instructions, or if
-/// none do, where values must be merged (like PHI nodes). The added
-/// complication is that because codegen has already finished, a PHI node may
-/// be needed for a variable location to be correct, but no register or spill
-/// slot merges the necessary values. In these circumstances, the variable
-/// location is dropped.
+/// The primary difference from normal SSA construction is that we cannot
+/// _create_ PHI values that contain variable values. CodeGen has already
+/// completed, and we can't alter it just to make debug-info complete. Thus:
+/// we can identify function positions where we would like a PHI value for a
+/// variable, but must search the MachineFunction to see whether such a PHI is
+/// available. If no such PHI exists, the variable location must be dropped.
///
-/// What makes this analysis non-trivial is loops: we cannot tell in advance
-/// whether a variable location is live throughout a loop, or whether its
-/// location is clobbered (or redefined by another DBG_VALUE), without
-/// exploring all the way through.
-///
-/// To make this simpler we perform two kinds of analysis. First, we identify
+/// To achieve this, we perform two kinds of analysis. First, we identify
/// every value defined by every instruction (ignoring those that only move
-/// another value), then compute a map of which values are available for each
-/// instruction. This is stronger than a reaching-def analysis, as we create
-/// PHI values where other values merge.
-///
-/// Secondly, for each variable, we effectively re-construct SSA using each
-/// DBG_VALUE as a def. The DBG_VALUEs read a value-number computed by the
-/// first analysis from the location they refer to. We can then compute the
-/// dominance frontiers of where a variable has a value, and create PHI nodes
-/// where they merge.
-/// This isn't precisely SSA-construction though, because the function shape
-/// is pre-defined. If a variable location requires a PHI node, but no
-/// PHI for the relevant values is present in the function (as computed by the
-/// first analysis), the location must be dropped.
-///
-/// Once both are complete, we can pass back over all instructions knowing:
-/// * What _value_ each variable should contain, either defined by an
-/// instruction or where control flow merges
-/// * What the location of that value is (if any).
-/// Allowing us to create appropriate live-in DBG_VALUEs, and DBG_VALUEs when
-/// a value moves location. After this pass runs, all variable locations within
-/// a block should be specified by DBG_VALUEs within that block, allowing
-/// DbgEntityHistoryCalculator to focus on individual blocks.
-///
-/// This pass is able to go fast because the size of the first
-/// reaching-definition analysis is proportional to the working-set size of
-/// the function, which the compiler tries to keep small. (It's also
-/// proportional to the number of blocks). Additionally, we repeatedly perform
-/// the second reaching-definition analysis with only the variables and blocks
-/// in a single lexical scope, exploiting their locality.
-///
-/// Determining where PHIs happen is trickier with this approach, and it comes
-/// to a head in the major problem for LiveDebugValues: is a value live-through
-/// a loop, or not? Your garden-variety dataflow analysis aims to build a set of
-/// facts about a function, however this analysis needs to generate new value
-/// numbers at joins.
-///
-/// To do this, consider a lattice of all definition values, from instructions
-/// and from PHIs. Each PHI is characterised by the RPO number of the block it
-/// occurs in. Each value pair A, B can be ordered by RPO(A) < RPO(B):
-/// with non-PHI values at the top, and any PHI value in the last block (by RPO
-/// order) at the bottom.
-///
-/// (Awkwardly: lower-down-the _lattice_ means a greater RPO _number_. Below,
-/// "rank" always refers to the former).
-///
-/// At any join, for each register, we consider:
-/// * All incoming values, and
-/// * The PREVIOUS live-in value at this join.
-/// If all incoming values agree: that's the live-in value. If they do not, the
-/// incoming values are ranked according to the partial order, and the NEXT
-/// LOWEST rank after the PREVIOUS live-in value is picked (multiple values of
-/// the same rank are ignored as conflicting). If there are no candidate values,
-/// or if the rank of the live-in would be lower than the rank of the current
-/// blocks PHIs, create a new PHI value.
-///
-/// Intuitively: if it's not immediately obvious what value a join should result
-/// in, we iteratively descend from instruction-definitions down through PHI
-/// values, getting closer to the current block each time. If the current block
-/// is a loop head, this ordering is effectively searching outer levels of
-/// loops, to find a value that's live-through the current loop.
+/// another value), then re-compute an SSA-form representation of the
+/// MachineFunction, using value propagation to eliminate any un-necessary
+/// PHI values. This gives us a map of every value computed in the function,
+/// and its location within the register file / stack.
///
-/// If there is no value that's live-through this loop, a PHI is created for
-/// this location instead. We can't use a lower-ranked PHI because by definition
-/// it doesn't dominate the current block. We can't create a PHI value any
-/// earlier, because we risk creating a PHI value at a location where values do
-/// not in fact merge, thus misrepresenting the truth, and not making the true
-/// live-through value for variable locations.
+/// Secondly, for each variable we perform the same analysis, where each debug
+/// instruction is considered a def, and every instruction where the variable
+/// is in lexical scope as a use. Value propagation is used again to eliminate
+/// any un-necessary PHIs. This gives us a map of each variable to the value
+/// it should have in a block.
///
-/// This algorithm applies to both calculating the availability of values in
-/// the first analysis, and the location of variables in the second. However
-/// for the second we add an extra dimension of pain: creating a variable
-/// location PHI is only valid if, for each incoming edge,
-/// * There is a value for the variable on the incoming edge, and
-/// * All the edges have that value in the same register.
-/// Or put another way: we can only create a variable-location PHI if there is
-/// a matching machine-location PHI, each input to which is the variables value
-/// in the predecessor block.
+/// Once both are complete, we have two maps for each block:
+/// * Variables to the values they should have,
+/// * Values to the register / spill slot they are located in.
+/// After which we can marry-up variable values with a location, and emit
+/// DBG_VALUE instructions specifying those locations. Variable locations may
+/// be dropped in this process due to the desired variable value not being
+/// resident in any machine location, or because there is no PHI value in any
+/// location that accurately represents the desired value. The building of
+/// location lists for each block is left to DbgEntityHistoryCalculator.
///
-/// To accommodate this difference, each point on the lattice is split in
-/// two: a "proposed" PHI and "definite" PHI. Any PHI that can immediately
-/// have a location determined are "definite" PHIs, and no further work is
-/// needed. Otherwise, a location that all non-backedge predecessors agree
-/// on is picked and propagated as a "proposed" PHI value. If that PHI value
-/// is truly live-through, it'll appear on the loop backedges on the next
-/// dataflow iteration, after which the block live-in moves to be a "definite"
-/// PHI. If it's not truly live-through, the variable value will be downgraded
-/// further as we explore the lattice, or remains "proposed" and is considered
-/// invalid once dataflow completes.
+/// This pass is kept efficient because the size of the first SSA problem
+/// is proportional to the working-set size of the function, which the compiler
+/// tries to keep small. (It's also proportional to the number of blocks).
+/// Additionally, we repeatedly perform the second SSA problem analysis with
+/// only the variables and blocks in a single lexical scope, exploiting their
+/// locality.
///
/// ### Terminology
///
@@ -128,15 +62,13 @@
/// contain the appropriate variable value. A value that is a PHI node is
/// occasionally called an mphi.
///
-/// The first dataflow problem is the "machine value location" problem,
+/// The first SSA problem is the "machine value location" problem,
/// because we're determining which machine locations contain which values.
/// The "locations" are constant: what's unknown is what value they contain.
///
-/// The second dataflow problem (the one for variables) is the "variable value
+/// The second SSA problem (the one for variables) is the "variable value
/// problem", because it's determining what values a variable has, rather than
-/// what location those values are placed in. Unfortunately, it's not that
-/// simple, because producing a PHI value always involves picking a location.
-/// This is an imperfection that we just have to accept, at least for now.
+/// what location those values are placed in.
///
/// TODO:
/// Overlapping fragments
@@ -153,9 +85,10 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/UniqueVector.h"
+#include "llvm/Analysis/IteratedDominanceFrontier.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -192,16 +125,18 @@
#include <cassert>
#include <cstdint>
#include <functional>
+#include <limits.h>
+#include <limits>
#include <queue>
#include <tuple>
#include <utility>
#include <vector>
-#include <limits.h>
-#include <limits>
+#include "InstrRefBasedImpl.h"
#include "LiveDebugValues.h"
using namespace llvm;
+using namespace LiveDebugValues;
// SSAUpdaterImple sets DEBUG_TYPE, change it.
#undef DEBUG_TYPE
@@ -213,730 +148,6 @@ static cl::opt<bool> EmulateOldLDV("emulate-old-livedebugvalues", cl::Hidden,
cl::desc("Act like old LiveDebugValues did"),
cl::init(false));
-namespace {
-
-// The location at which a spilled value resides. It consists of a register and
-// an offset.
-struct SpillLoc {
- unsigned SpillBase;
- StackOffset SpillOffset;
- bool operator==(const SpillLoc &Other) const {
- return std::make_pair(SpillBase, SpillOffset) ==
- std::make_pair(Other.SpillBase, Other.SpillOffset);
- }
- bool operator<(const SpillLoc &Other) const {
- return std::make_tuple(SpillBase, SpillOffset.getFixed(),
- SpillOffset.getScalable()) <
- std::make_tuple(Other.SpillBase, Other.SpillOffset.getFixed(),
- Other.SpillOffset.getScalable());
- }
-};
-
-class LocIdx {
- unsigned Location;
-
- // Default constructor is private, initializing to an illegal location number.
- // Use only for "not an entry" elements in IndexedMaps.
- LocIdx() : Location(UINT_MAX) { }
-
-public:
- #define NUM_LOC_BITS 24
- LocIdx(unsigned L) : Location(L) {
- assert(L < (1 << NUM_LOC_BITS) && "Machine locations must fit in 24 bits");
- }
-
- static LocIdx MakeIllegalLoc() {
- return LocIdx();
- }
-
- bool isIllegal() const {
- return Location == UINT_MAX;
- }
-
- uint64_t asU64() const {
- return Location;
- }
-
- bool operator==(unsigned L) const {
- return Location == L;
- }
-
- bool operator==(const LocIdx &L) const {
- return Location == L.Location;
- }
-
- bool operator!=(unsigned L) const {
- return !(*this == L);
- }
-
- bool operator!=(const LocIdx &L) const {
- return !(*this == L);
- }
-
- bool operator<(const LocIdx &Other) const {
- return Location < Other.Location;
- }
-};
-
-class LocIdxToIndexFunctor {
-public:
- using argument_type = LocIdx;
- unsigned operator()(const LocIdx &L) const {
- return L.asU64();
- }
-};
-
-/// Unique identifier for a value defined by an instruction, as a value type.
-/// Casts back and forth to a uint64_t. Probably replacable with something less
-/// bit-constrained. Each value identifies the instruction and machine location
-/// where the value is defined, although there may be no corresponding machine
-/// operand for it (ex: regmasks clobbering values). The instructions are
-/// one-based, and definitions that are PHIs have instruction number zero.
-///
-/// The obvious limits of a 1M block function or 1M instruction blocks are
-/// problematic; but by that point we should probably have bailed out of
-/// trying to analyse the function.
-class ValueIDNum {
- uint64_t BlockNo : 20; /// The block where the def happens.
- uint64_t InstNo : 20; /// The Instruction where the def happens.
- /// One based, is distance from start of block.
- uint64_t LocNo : NUM_LOC_BITS; /// The machine location where the def happens.
-
-public:
- // XXX -- temporarily enabled while the live-in / live-out tables are moved
- // to something more type-y
- ValueIDNum() : BlockNo(0xFFFFF),
- InstNo(0xFFFFF),
- LocNo(0xFFFFFF) { }
-
- ValueIDNum(uint64_t Block, uint64_t Inst, uint64_t Loc)
- : BlockNo(Block), InstNo(Inst), LocNo(Loc) { }
-
- ValueIDNum(uint64_t Block, uint64_t Inst, LocIdx Loc)
- : BlockNo(Block), InstNo(Inst), LocNo(Loc.asU64()) { }
-
- uint64_t getBlock() const { return BlockNo; }
- uint64_t getInst() const { return InstNo; }
- uint64_t getLoc() const { return LocNo; }
- bool isPHI() const { return InstNo == 0; }
-
- uint64_t asU64() const {
- uint64_t TmpBlock = BlockNo;
- uint64_t TmpInst = InstNo;
- return TmpBlock << 44ull | TmpInst << NUM_LOC_BITS | LocNo;
- }
-
- static ValueIDNum fromU64(uint64_t v) {
- uint64_t L = (v & 0x3FFF);
- return {v >> 44ull, ((v >> NUM_LOC_BITS) & 0xFFFFF), L};
- }
-
- bool operator<(const ValueIDNum &Other) const {
- return asU64() < Other.asU64();
- }
-
- bool operator==(const ValueIDNum &Other) const {
- return std::tie(BlockNo, InstNo, LocNo) ==
- std::tie(Other.BlockNo, Other.InstNo, Other.LocNo);
- }
-
- bool operator!=(const ValueIDNum &Other) const { return !(*this == Other); }
-
- std::string asString(const std::string &mlocname) const {
- return Twine("Value{bb: ")
- .concat(Twine(BlockNo).concat(
- Twine(", inst: ")
- .concat((InstNo ? Twine(InstNo) : Twine("live-in"))
- .concat(Twine(", loc: ").concat(Twine(mlocname)))
- .concat(Twine("}")))))
- .str();
- }
-
- static ValueIDNum EmptyValue;
-};
-
-} // end anonymous namespace
-
-namespace {
-
-/// Meta qualifiers for a value. Pair of whatever expression is used to qualify
-/// the the value, and Boolean of whether or not it's indirect.
-class DbgValueProperties {
-public:
- DbgValueProperties(const DIExpression *DIExpr, bool Indirect)
- : DIExpr(DIExpr), Indirect(Indirect) {}
-
- /// Extract properties from an existing DBG_VALUE instruction.
- DbgValueProperties(const MachineInstr &MI) {
- assert(MI.isDebugValue());
- DIExpr = MI.getDebugExpression();
- Indirect = MI.getOperand(1).isImm();
- }
-
- bool operator==(const DbgValueProperties &Other) const {
- return std::tie(DIExpr, Indirect) == std::tie(Other.DIExpr, Other.Indirect);
- }
-
- bool operator!=(const DbgValueProperties &Other) const {
- return !(*this == Other);
- }
-
- const DIExpression *DIExpr;
- bool Indirect;
-};
-
-/// Tracker for what values are in machine locations. Listens to the Things
-/// being Done by various instructions, and maintains a table of what machine
-/// locations have what values (as defined by a ValueIDNum).
-///
-/// There are potentially a much larger number of machine locations on the
-/// target machine than the actual working-set size of the function. On x86 for
-/// example, we're extremely unlikely to want to track values through control
-/// or debug registers. To avoid doing so, MLocTracker has several layers of
-/// indirection going on, with two kinds of ``location'':
-/// * A LocID uniquely identifies a register or spill location, with a
-/// predictable value.
-/// * A LocIdx is a key (in the database sense) for a LocID and a ValueIDNum.
-/// Whenever a location is def'd or used by a MachineInstr, we automagically
-/// create a new LocIdx for a location, but not otherwise. This ensures we only
-/// account for locations that are actually used or defined. The cost is another
-/// vector lookup (of LocID -> LocIdx) over any other implementation. This is
-/// fairly cheap, and the compiler tries to reduce the working-set at any one
-/// time in the function anyway.
-///
-/// Register mask operands completely blow this out of the water; I've just
-/// piled hacks on top of hacks to get around that.
-class MLocTracker {
-public:
- MachineFunction &MF;
- const TargetInstrInfo &TII;
- const TargetRegisterInfo &TRI;
- const TargetLowering &TLI;
-
- /// IndexedMap type, mapping from LocIdx to ValueIDNum.
- using LocToValueType = IndexedMap<ValueIDNum, LocIdxToIndexFunctor>;
-
- /// Map of LocIdxes to the ValueIDNums that they store. This is tightly
- /// packed, entries only exist for locations that are being tracked.
- LocToValueType LocIdxToIDNum;
-
- /// "Map" of machine location IDs (i.e., raw register or spill number) to the
- /// LocIdx key / number for that location. There are always at least as many
- /// as the number of registers on the target -- if the value in the register
- /// is not being tracked, then the LocIdx value will be zero. New entries are
- /// appended if a new spill slot begins being tracked.
- /// This, and the corresponding reverse map persist for the analysis of the
- /// whole function, and is necessarying for decoding various vectors of
- /// values.
- std::vector<LocIdx> LocIDToLocIdx;
-
- /// Inverse map of LocIDToLocIdx.
- IndexedMap<unsigned, LocIdxToIndexFunctor> LocIdxToLocID;
-
- /// Unique-ification of spill slots. Used to number them -- their LocID
- /// number is the index in SpillLocs minus one plus NumRegs.
- UniqueVector<SpillLoc> SpillLocs;
-
- // If we discover a new machine location, assign it an mphi with this
- // block number.
- unsigned CurBB;
-
- /// Cached local copy of the number of registers the target has.
- unsigned NumRegs;
-
- /// Collection of register mask operands that have been observed. Second part
- /// of pair indicates the instruction that they happened in. Used to
- /// reconstruct where defs happened if we start tracking a location later
- /// on.
- SmallVector<std::pair<const MachineOperand *, unsigned>, 32> Masks;
-
- /// Iterator for locations and the values they contain. Dereferencing
- /// produces a struct/pair containing the LocIdx key for this location,
- /// and a reference to the value currently stored. Simplifies the process
- /// of seeking a particular location.
- class MLocIterator {
- LocToValueType &ValueMap;
- LocIdx Idx;
-
- public:
- class value_type {
- public:
- value_type(LocIdx Idx, ValueIDNum &Value) : Idx(Idx), Value(Value) { }
- const LocIdx Idx; /// Read-only index of this location.
- ValueIDNum &Value; /// Reference to the stored value at this location.
- };
-
- MLocIterator(LocToValueType &ValueMap, LocIdx Idx)
- : ValueMap(ValueMap), Idx(Idx) { }
-
- bool operator==(const MLocIterator &Other) const {
- assert(&ValueMap == &Other.ValueMap);
- return Idx == Other.Idx;
- }
-
- bool operator!=(const MLocIterator &Other) const {
- return !(*this == Other);
- }
-
- void operator++() {
- Idx = LocIdx(Idx.asU64() + 1);
- }
-
- value_type operator*() {
- return value_type(Idx, ValueMap[LocIdx(Idx)]);
- }
- };
-
- MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII,
- const TargetRegisterInfo &TRI, const TargetLowering &TLI)
- : MF(MF), TII(TII), TRI(TRI), TLI(TLI),
- LocIdxToIDNum(ValueIDNum::EmptyValue),
- LocIdxToLocID(0) {
- NumRegs = TRI.getNumRegs();
- reset();
- LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc());
- assert(NumRegs < (1u << NUM_LOC_BITS)); // Detect bit packing failure
-
- // Always track SP. This avoids the implicit clobbering caused by regmasks
- // from affectings its values. (LiveDebugValues disbelieves calls and
- // regmasks that claim to clobber SP).
- Register SP = TLI.getStackPointerRegisterToSaveRestore();
- if (SP) {
- unsigned ID = getLocID(SP, false);
- (void)lookupOrTrackRegister(ID);
- }
- }
-
- /// Produce location ID number for indexing LocIDToLocIdx. Takes the register
- /// or spill number, and flag for whether it's a spill or not.
- unsigned getLocID(Register RegOrSpill, bool isSpill) {
- return (isSpill) ? RegOrSpill.id() + NumRegs - 1 : RegOrSpill.id();
- }
-
- /// Accessor for reading the value at Idx.
- ValueIDNum getNumAtPos(LocIdx Idx) const {
- assert(Idx.asU64() < LocIdxToIDNum.size());
- return LocIdxToIDNum[Idx];
- }
-
- unsigned getNumLocs(void) const { return LocIdxToIDNum.size(); }
-
- /// Reset all locations to contain a PHI value at the designated block. Used
- /// sometimes for actual PHI values, othertimes to indicate the block entry
- /// value (before any more information is known).
- void setMPhis(unsigned NewCurBB) {
- CurBB = NewCurBB;
- for (auto Location : locations())
- Location.Value = {CurBB, 0, Location.Idx};
- }
-
- /// Load values for each location from array of ValueIDNums. Take current
- /// bbnum just in case we read a value from a hitherto untouched register.
- void loadFromArray(ValueIDNum *Locs, unsigned NewCurBB) {
- CurBB = NewCurBB;
- // Iterate over all tracked locations, and load each locations live-in
- // value into our local index.
- for (auto Location : locations())
- Location.Value = Locs[Location.Idx.asU64()];
- }
-
- /// Wipe any un-necessary location records after traversing a block.
- void reset(void) {
- // We could reset all the location values too; however either loadFromArray
- // or setMPhis should be called before this object is re-used. Just
- // clear Masks, they're definitely not needed.
- Masks.clear();
- }
-
- /// Clear all data. Destroys the LocID <=> LocIdx map, which makes most of
- /// the information in this pass uninterpretable.
- void clear(void) {
- reset();
- LocIDToLocIdx.clear();
- LocIdxToLocID.clear();
- LocIdxToIDNum.clear();
- //SpillLocs.reset(); XXX UniqueVector::reset assumes a SpillLoc casts from 0
- SpillLocs = decltype(SpillLocs)();
-
- LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc());
- }
-
- /// Set a locaiton to a certain value.
- void setMLoc(LocIdx L, ValueIDNum Num) {
- assert(L.asU64() < LocIdxToIDNum.size());
- LocIdxToIDNum[L] = Num;
- }
-
- /// Create a LocIdx for an untracked register ID. Initialize it to either an
- /// mphi value representing a live-in, or a recent register mask clobber.
- LocIdx trackRegister(unsigned ID) {
- assert(ID != 0);
- LocIdx NewIdx = LocIdx(LocIdxToIDNum.size());
- LocIdxToIDNum.grow(NewIdx);
- LocIdxToLocID.grow(NewIdx);
-
- // Default: it's an mphi.
- ValueIDNum ValNum = {CurBB, 0, NewIdx};
- // Was this reg ever touched by a regmask?
- for (const auto &MaskPair : reverse(Masks)) {
- if (MaskPair.first->clobbersPhysReg(ID)) {
- // There was an earlier def we skipped.
- ValNum = {CurBB, MaskPair.second, NewIdx};
- break;
- }
- }
-
- LocIdxToIDNum[NewIdx] = ValNum;
- LocIdxToLocID[NewIdx] = ID;
- return NewIdx;
- }
-
- LocIdx lookupOrTrackRegister(unsigned ID) {
- LocIdx &Index = LocIDToLocIdx[ID];
- if (Index.isIllegal())
- Index = trackRegister(ID);
- return Index;
- }
-
- /// Record a definition of the specified register at the given block / inst.
- /// This doesn't take a ValueIDNum, because the definition and its location
- /// are synonymous.
- void defReg(Register R, unsigned BB, unsigned Inst) {
- unsigned ID = getLocID(R, false);
- LocIdx Idx = lookupOrTrackRegister(ID);
- ValueIDNum ValueID = {BB, Inst, Idx};
- LocIdxToIDNum[Idx] = ValueID;
- }
-
- /// Set a register to a value number. To be used if the value number is
- /// known in advance.
- void setReg(Register R, ValueIDNum ValueID) {
- unsigned ID = getLocID(R, false);
- LocIdx Idx = lookupOrTrackRegister(ID);
- LocIdxToIDNum[Idx] = ValueID;
- }
-
- ValueIDNum readReg(Register R) {
- unsigned ID = getLocID(R, false);
- LocIdx Idx = lookupOrTrackRegister(ID);
- return LocIdxToIDNum[Idx];
- }
-
- /// Reset a register value to zero / empty. Needed to replicate the
- /// VarLoc implementation where a copy to/from a register effectively
- /// clears the contents of the source register. (Values can only have one
- /// machine location in VarLocBasedImpl).
- void wipeRegister(Register R) {
- unsigned ID = getLocID(R, false);
- LocIdx Idx = LocIDToLocIdx[ID];
- LocIdxToIDNum[Idx] = ValueIDNum::EmptyValue;
- }
-
- /// Determine the LocIdx of an existing register.
- LocIdx getRegMLoc(Register R) {
- unsigned ID = getLocID(R, false);
- return LocIDToLocIdx[ID];
- }
-
- /// Record a RegMask operand being executed. Defs any register we currently
- /// track, stores a pointer to the mask in case we have to account for it
- /// later.
- void writeRegMask(const MachineOperand *MO, unsigned CurBB, unsigned InstID) {
- // Ensure SP exists, so that we don't override it later.
- Register SP = TLI.getStackPointerRegisterToSaveRestore();
-
- // Def any register we track have that isn't preserved. The regmask
- // terminates the liveness of a register, meaning its value can't be
- // relied upon -- we represent this by giving it a new value.
- for (auto Location : locations()) {
- unsigned ID = LocIdxToLocID[Location.Idx];
- // Don't clobber SP, even if the mask says it's clobbered.
- if (ID < NumRegs && ID != SP && MO->clobbersPhysReg(ID))
- defReg(ID, CurBB, InstID);
- }
- Masks.push_back(std::make_pair(MO, InstID));
- }
-
- /// Find LocIdx for SpillLoc \p L, creating a new one if it's not tracked.
- LocIdx getOrTrackSpillLoc(SpillLoc L) {
- unsigned SpillID = SpillLocs.idFor(L);
- if (SpillID == 0) {
- SpillID = SpillLocs.insert(L);
- unsigned L = getLocID(SpillID, true);
- LocIdx Idx = LocIdx(LocIdxToIDNum.size()); // New idx
- LocIdxToIDNum.grow(Idx);
- LocIdxToLocID.grow(Idx);
- LocIDToLocIdx.push_back(Idx);
- LocIdxToLocID[Idx] = L;
- return Idx;
- } else {
- unsigned L = getLocID(SpillID, true);
- LocIdx Idx = LocIDToLocIdx[L];
- return Idx;
- }
- }
-
- /// Set the value stored in a spill slot.
- void setSpill(SpillLoc L, ValueIDNum ValueID) {
- LocIdx Idx = getOrTrackSpillLoc(L);
- LocIdxToIDNum[Idx] = ValueID;
- }
-
- /// Read whatever value is in a spill slot, or None if it isn't tracked.
- Optional<ValueIDNum> readSpill(SpillLoc L) {
- unsigned SpillID = SpillLocs.idFor(L);
- if (SpillID == 0)
- return None;
-
- unsigned LocID = getLocID(SpillID, true);
- LocIdx Idx = LocIDToLocIdx[LocID];
- return LocIdxToIDNum[Idx];
- }
-
- /// Determine the LocIdx of a spill slot. Return None if it previously
- /// hasn't had a value assigned.
- Optional<LocIdx> getSpillMLoc(SpillLoc L) {
- unsigned SpillID = SpillLocs.idFor(L);
- if (SpillID == 0)
- return None;
- unsigned LocNo = getLocID(SpillID, true);
- return LocIDToLocIdx[LocNo];
- }
-
- /// Return true if Idx is a spill machine location.
- bool isSpill(LocIdx Idx) const {
- return LocIdxToLocID[Idx] >= NumRegs;
- }
-
- MLocIterator begin() {
- return MLocIterator(LocIdxToIDNum, 0);
- }
-
- MLocIterator end() {
- return MLocIterator(LocIdxToIDNum, LocIdxToIDNum.size());
- }
-
- /// Return a range over all locations currently tracked.
- iterator_range<MLocIterator> locations() {
- return llvm::make_range(begin(), end());
- }
-
- std::string LocIdxToName(LocIdx Idx) const {
- unsigned ID = LocIdxToLocID[Idx];
- if (ID >= NumRegs)
- return Twine("slot ").concat(Twine(ID - NumRegs)).str();
- else
- return TRI.getRegAsmName(ID).str();
- }
-
- std::string IDAsString(const ValueIDNum &Num) const {
- std::string DefName = LocIdxToName(Num.getLoc());
- return Num.asString(DefName);
- }
-
- LLVM_DUMP_METHOD
- void dump() {
- for (auto Location : locations()) {
- std::string MLocName = LocIdxToName(Location.Value.getLoc());
- std::string DefName = Location.Value.asString(MLocName);
- dbgs() << LocIdxToName(Location.Idx) << " --> " << DefName << "\n";
- }
- }
-
- LLVM_DUMP_METHOD
- void dump_mloc_map() {
- for (auto Location : locations()) {
- std::string foo = LocIdxToName(Location.Idx);
- dbgs() << "Idx " << Location.Idx.asU64() << " " << foo << "\n";
- }
- }
-
- /// Create a DBG_VALUE based on machine location \p MLoc. Qualify it with the
- /// information in \pProperties, for variable Var. Don't insert it anywhere,
- /// just return the builder for it.
- MachineInstrBuilder emitLoc(Optional<LocIdx> MLoc, const DebugVariable &Var,
- const DbgValueProperties &Properties) {
- DebugLoc DL = DILocation::get(Var.getVariable()->getContext(), 0, 0,
- Var.getVariable()->getScope(),
- const_cast<DILocation *>(Var.getInlinedAt()));
- auto MIB = BuildMI(MF, DL, TII.get(TargetOpcode::DBG_VALUE));
-
- const DIExpression *Expr = Properties.DIExpr;
- if (!MLoc) {
- // No location -> DBG_VALUE $noreg
- MIB.addReg(0, RegState::Debug);
- MIB.addReg(0, RegState::Debug);
- } else if (LocIdxToLocID[*MLoc] >= NumRegs) {
- unsigned LocID = LocIdxToLocID[*MLoc];
- const SpillLoc &Spill = SpillLocs[LocID - NumRegs + 1];
-
- auto *TRI = MF.getSubtarget().getRegisterInfo();
- Expr = TRI->prependOffsetExpression(Expr, DIExpression::ApplyOffset,
- Spill.SpillOffset);
- unsigned Base = Spill.SpillBase;
- MIB.addReg(Base, RegState::Debug);
- MIB.addImm(0);
- } else {
- unsigned LocID = LocIdxToLocID[*MLoc];
- MIB.addReg(LocID, RegState::Debug);
- if (Properties.Indirect)
- MIB.addImm(0);
- else
- MIB.addReg(0, RegState::Debug);
- }
-
- MIB.addMetadata(Var.getVariable());
- MIB.addMetadata(Expr);
- return MIB;
- }
-};
-
-/// Class recording the (high level) _value_ of a variable. Identifies either
-/// the value of the variable as a ValueIDNum, or a constant MachineOperand.
-/// This class also stores meta-information about how the value is qualified.
-/// Used to reason about variable values when performing the second
-/// (DebugVariable specific) dataflow analysis.
-class DbgValue {
-public:
- union {
- /// If Kind is Def, the value number that this value is based on.
- ValueIDNum ID;
- /// If Kind is Const, the MachineOperand defining this value.
- MachineOperand MO;
- /// For a NoVal DbgValue, which block it was generated in.
- unsigned BlockNo;
- };
- /// Qualifiers for the ValueIDNum above.
- DbgValueProperties Properties;
-
- typedef enum {
- Undef, // Represents a DBG_VALUE $noreg in the transfer function only.
- Def, // This value is defined by an inst, or is a PHI value.
- Const, // A constant value contained in the MachineOperand field.
- Proposed, // This is a tentative PHI value, which may be confirmed or
- // invalidated later.
- NoVal // Empty DbgValue, generated during dataflow. BlockNo stores
- // which block this was generated in.
- } KindT;
- /// Discriminator for whether this is a constant or an in-program value.
- KindT Kind;
-
- DbgValue(const ValueIDNum &Val, const DbgValueProperties &Prop, KindT Kind)
- : ID(Val), Properties(Prop), Kind(Kind) {
- assert(Kind == Def || Kind == Proposed);
- }
-
- DbgValue(unsigned BlockNo, const DbgValueProperties &Prop, KindT Kind)
- : BlockNo(BlockNo), Properties(Prop), Kind(Kind) {
- assert(Kind == NoVal);
- }
-
- DbgValue(const MachineOperand &MO, const DbgValueProperties &Prop, KindT Kind)
- : MO(MO), Properties(Prop), Kind(Kind) {
- assert(Kind == Const);
- }
-
- DbgValue(const DbgValueProperties &Prop, KindT Kind)
- : Properties(Prop), Kind(Kind) {
- assert(Kind == Undef &&
- "Empty DbgValue constructor must pass in Undef kind");
- }
-
- void dump(const MLocTracker *MTrack) const {
- if (Kind == Const) {
- MO.dump();
- } else if (Kind == NoVal) {
- dbgs() << "NoVal(" << BlockNo << ")";
- } else if (Kind == Proposed) {
- dbgs() << "VPHI(" << MTrack->IDAsString(ID) << ")";
- } else {
- assert(Kind == Def);
- dbgs() << MTrack->IDAsString(ID);
- }
- if (Properties.Indirect)
- dbgs() << " indir";
- if (Properties.DIExpr)
- dbgs() << " " << *Properties.DIExpr;
- }
-
- bool operator==(const DbgValue &Other) const {
- if (std::tie(Kind, Properties) != std::tie(Other.Kind, Other.Properties))
- return false;
- else if (Kind == Proposed && ID != Other.ID)
- return false;
- else if (Kind == Def && ID != Other.ID)
- return false;
- else if (Kind == NoVal && BlockNo != Other.BlockNo)
- return false;
- else if (Kind == Const)
- return MO.isIdenticalTo(Other.MO);
-
- return true;
- }
-
- bool operator!=(const DbgValue &Other) const { return !(*this == Other); }
-};
-
-/// Types for recording sets of variable fragments that overlap. For a given
-/// local variable, we record all other fragments of that variable that could
-/// overlap it, to reduce search time.
-using FragmentOfVar =
- std::pair<const DILocalVariable *, DIExpression::FragmentInfo>;
-using OverlapMap =
- DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>;
-
-/// Collection of DBG_VALUEs observed when traversing a block. Records each
-/// variable and the value the DBG_VALUE refers to. Requires the machine value
-/// location dataflow algorithm to have run already, so that values can be
-/// identified.
-class VLocTracker {
-public:
- /// Map DebugVariable to the latest Value it's defined to have.
- /// Needs to be a MapVector because we determine order-in-the-input-MIR from
- /// the order in this container.
- /// We only retain the last DbgValue in each block for each variable, to
- /// determine the blocks live-out variable value. The Vars container forms the
- /// transfer function for this block, as part of the dataflow analysis. The
- /// movement of values between locations inside of a block is handled at a
- /// much later stage, in the TransferTracker class.
- MapVector<DebugVariable, DbgValue> Vars;
- DenseMap<DebugVariable, const DILocation *> Scopes;
- MachineBasicBlock *MBB;
-
-public:
- VLocTracker() {}
-
- void defVar(const MachineInstr &MI, const DbgValueProperties &Properties,
- Optional<ValueIDNum> ID) {
- assert(MI.isDebugValue() || MI.isDebugRef());
- DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
- MI.getDebugLoc()->getInlinedAt());
- DbgValue Rec = (ID) ? DbgValue(*ID, Properties, DbgValue::Def)
- : DbgValue(Properties, DbgValue::Undef);
-
- // Attempt insertion; overwrite if it's already mapped.
- auto Result = Vars.insert(std::make_pair(Var, Rec));
- if (!Result.second)
- Result.first->second = Rec;
- Scopes[Var] = MI.getDebugLoc().get();
- }
-
- void defVar(const MachineInstr &MI, const MachineOperand &MO) {
- // Only DBG_VALUEs can define constant-valued variables.
- assert(MI.isDebugValue());
- DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
- MI.getDebugLoc()->getInlinedAt());
- DbgValueProperties Properties(MI);
- DbgValue Rec = DbgValue(MO, Properties, DbgValue::Const);
-
- // Attempt insertion; overwrite if it's already mapped.
- auto Result = Vars.insert(std::make_pair(Var, Rec));
- if (!Result.second)
- Result.first->second = Rec;
- Scopes[Var] = MI.getDebugLoc().get();
- }
-};
-
/// Tracker for converting machine value locations and variable values into
/// variable locations (the output of LiveDebugValues), recorded as DBG_VALUEs
/// specifying block live-in locations and transfers within blocks.
@@ -985,12 +196,12 @@ public:
/// between TransferTrackers view of variable locations and MLocTrackers. For
/// example, MLocTracker observes all clobbers, but TransferTracker lazily
/// does not.
- std::vector<ValueIDNum> VarLocs;
+ SmallVector<ValueIDNum, 32> VarLocs;
/// Map from LocIdxes to which DebugVariables are based that location.
/// Mantained while stepping through the block. Not accurate if
/// VarLocs[Idx] != MTracker->LocIdxToIDNum[Idx].
- std::map<LocIdx, SmallSet<DebugVariable, 4>> ActiveMLocs;
+ DenseMap<LocIdx, SmallSet<DebugVariable, 4>> ActiveMLocs;
/// Map from DebugVariable to it's current location and qualifying meta
/// information. To be used in conjunction with ActiveMLocs to construct
@@ -1062,6 +273,8 @@ public:
// Map of the preferred location for each value.
std::map<ValueIDNum, LocIdx> ValueToLoc;
+ ActiveMLocs.reserve(VLocs.size());
+ ActiveVLocs.reserve(VLocs.size());
// Produce a map of value numbers to the current machine locs they live
// in. When emulating VarLocBasedImpl, there should only be one
@@ -1088,7 +301,7 @@ public:
for (auto Var : VLocs) {
if (Var.second.Kind == DbgValue::Const) {
PendingDbgValues.push_back(
- emitMOLoc(Var.second.MO, Var.first, Var.second.Properties));
+ emitMOLoc(*Var.second.MO, Var.first, Var.second.Properties));
continue;
}
@@ -1142,7 +355,7 @@ public:
// instruction or similar with an instruction number, where it doesn't
// actually define a new value, instead it moves a value. In case this
// happens, discard.
- if (MTracker->LocIdxToIDNum[L] != Use.ID)
+ if (MTracker->readMLoc(L) != Use.ID)
continue;
// If a different debug instruction defined the variable value / location
@@ -1220,7 +433,6 @@ public:
DIExpression::prepend(Prop.DIExpr, DIExpression::EntryValue);
Register Reg = MTracker->LocIdxToLocID[Num.getLoc()];
MachineOperand MO = MachineOperand::CreateReg(Reg, false);
- MO.setIsDebug(true);
PendingDbgValues.push_back(emitMOLoc(MO, Var, {NewExpr, Prop.Indirect}));
return true;
@@ -1274,12 +486,12 @@ public:
// Check whether our local copy of values-by-location in #VarLocs is out of
// date. Wipe old tracking data for the location if it's been clobbered in
// the meantime.
- if (MTracker->getNumAtPos(NewLoc) != VarLocs[NewLoc.asU64()]) {
+ if (MTracker->readMLoc(NewLoc) != VarLocs[NewLoc.asU64()]) {
for (auto &P : ActiveMLocs[NewLoc]) {
ActiveVLocs.erase(P);
}
ActiveMLocs[NewLoc.asU64()].clear();
- VarLocs[NewLoc.asU64()] = MTracker->getNumAtPos(NewLoc);
+ VarLocs[NewLoc.asU64()] = MTracker->readMLoc(NewLoc);
}
ActiveMLocs[NewLoc].insert(Var);
@@ -1358,6 +570,8 @@ public:
flushDbgValues(Pos, nullptr);
+ // Re-find ActiveMLocIt, iterator could have been invalidated.
+ ActiveMLocIt = ActiveMLocs.find(MLoc);
ActiveMLocIt->second.clear();
}
@@ -1367,21 +581,23 @@ public:
void transferMlocs(LocIdx Src, LocIdx Dst, MachineBasicBlock::iterator Pos) {
// Does Src still contain the value num we expect? If not, it's been
// clobbered in the meantime, and our variable locations are stale.
- if (VarLocs[Src.asU64()] != MTracker->getNumAtPos(Src))
+ if (VarLocs[Src.asU64()] != MTracker->readMLoc(Src))
return;
// assert(ActiveMLocs[Dst].size() == 0);
//^^^ Legitimate scenario on account of un-clobbered slot being assigned to?
- ActiveMLocs[Dst] = ActiveMLocs[Src];
+
+ // Move set of active variables from one location to another.
+ auto MovingVars = ActiveMLocs[Src];
+ ActiveMLocs[Dst] = MovingVars;
VarLocs[Dst.asU64()] = VarLocs[Src.asU64()];
// For each variable based on Src; create a location at Dst.
- for (auto &Var : ActiveMLocs[Src]) {
+ for (auto &Var : MovingVars) {
auto ActiveVLocIt = ActiveVLocs.find(Var);
assert(ActiveVLocIt != ActiveVLocs.end());
ActiveVLocIt->second.Loc = Dst;
- assert(Dst != 0);
MachineInstr *MI =
MTracker->emitLoc(Dst, Var, ActiveVLocIt->second.Properties);
PendingDbgValues.push_back(MI);
@@ -1413,306 +629,245 @@ public:
}
};
-class InstrRefBasedLDV : public LDVImpl {
-private:
- using FragmentInfo = DIExpression::FragmentInfo;
- using OptFragmentInfo = Optional<DIExpression::FragmentInfo>;
-
- // Helper while building OverlapMap, a map of all fragments seen for a given
- // DILocalVariable.
- using VarToFragments =
- DenseMap<const DILocalVariable *, SmallSet<FragmentInfo, 4>>;
-
- /// Machine location/value transfer function, a mapping of which locations
- /// are assigned which new values.
- using MLocTransferMap = std::map<LocIdx, ValueIDNum>;
-
- /// Live in/out structure for the variable values: a per-block map of
- /// variables to their values. XXX, better name?
- using LiveIdxT =
- DenseMap<const MachineBasicBlock *, DenseMap<DebugVariable, DbgValue> *>;
-
- using VarAndLoc = std::pair<DebugVariable, DbgValue>;
-
- /// Type for a live-in value: the predecessor block, and its value.
- using InValueT = std::pair<MachineBasicBlock *, DbgValue *>;
-
- /// Vector (per block) of a collection (inner smallvector) of live-ins.
- /// Used as the result type for the variable value dataflow problem.
- using LiveInsT = SmallVector<SmallVector<VarAndLoc, 8>, 8>;
-
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
- const TargetFrameLowering *TFI;
- const MachineFrameInfo *MFI;
- BitVector CalleeSavedRegs;
- LexicalScopes LS;
- TargetPassConfig *TPC;
-
- /// Object to track machine locations as we step through a block. Could
- /// probably be a field rather than a pointer, as it's always used.
- MLocTracker *MTracker;
+//===----------------------------------------------------------------------===//
+// Implementation
+//===----------------------------------------------------------------------===//
- /// Number of the current block LiveDebugValues is stepping through.
- unsigned CurBB;
+ValueIDNum ValueIDNum::EmptyValue = {UINT_MAX, UINT_MAX, UINT_MAX};
+ValueIDNum ValueIDNum::TombstoneValue = {UINT_MAX, UINT_MAX, UINT_MAX - 1};
- /// Number of the current instruction LiveDebugValues is evaluating.
- unsigned CurInst;
+#ifndef NDEBUG
+void DbgValue::dump(const MLocTracker *MTrack) const {
+ if (Kind == Const) {
+ MO->dump();
+ } else if (Kind == NoVal) {
+ dbgs() << "NoVal(" << BlockNo << ")";
+ } else if (Kind == VPHI) {
+ dbgs() << "VPHI(" << BlockNo << "," << MTrack->IDAsString(ID) << ")";
+ } else {
+ assert(Kind == Def);
+ dbgs() << MTrack->IDAsString(ID);
+ }
+ if (Properties.Indirect)
+ dbgs() << " indir";
+ if (Properties.DIExpr)
+ dbgs() << " " << *Properties.DIExpr;
+}
+#endif
- /// Variable tracker -- listens to DBG_VALUEs occurring as InstrRefBasedImpl
- /// steps through a block. Reads the values at each location from the
- /// MLocTracker object.
- VLocTracker *VTracker;
+MLocTracker::MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI,
+ const TargetLowering &TLI)
+ : MF(MF), TII(TII), TRI(TRI), TLI(TLI),
+ LocIdxToIDNum(ValueIDNum::EmptyValue), LocIdxToLocID(0) {
+ NumRegs = TRI.getNumRegs();
+ reset();
+ LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc());
+ assert(NumRegs < (1u << NUM_LOC_BITS)); // Detect bit packing failure
+
+ // Always track SP. This avoids the implicit clobbering caused by regmasks
+ // from affectings its values. (LiveDebugValues disbelieves calls and
+ // regmasks that claim to clobber SP).
+ Register SP = TLI.getStackPointerRegisterToSaveRestore();
+ if (SP) {
+ unsigned ID = getLocID(SP);
+ (void)lookupOrTrackRegister(ID);
+
+ for (MCRegAliasIterator RAI(SP, &TRI, true); RAI.isValid(); ++RAI)
+ SPAliases.insert(*RAI);
+ }
+
+ // Build some common stack positions -- full registers being spilt to the
+ // stack.
+ StackSlotIdxes.insert({{8, 0}, 0});
+ StackSlotIdxes.insert({{16, 0}, 1});
+ StackSlotIdxes.insert({{32, 0}, 2});
+ StackSlotIdxes.insert({{64, 0}, 3});
+ StackSlotIdxes.insert({{128, 0}, 4});
+ StackSlotIdxes.insert({{256, 0}, 5});
+ StackSlotIdxes.insert({{512, 0}, 6});
+
+ // Traverse all the subregister idxes, and ensure there's an index for them.
+ // Duplicates are no problem: we're interested in their position in the
+ // stack slot, we don't want to type the slot.
+ for (unsigned int I = 1; I < TRI.getNumSubRegIndices(); ++I) {
+ unsigned Size = TRI.getSubRegIdxSize(I);
+ unsigned Offs = TRI.getSubRegIdxOffset(I);
+ unsigned Idx = StackSlotIdxes.size();
+
+ // Some subregs have -1, -2 and so forth fed into their fields, to mean
+ // special backend things. Ignore those.
+ if (Size > 60000 || Offs > 60000)
+ continue;
- /// Tracker for transfers, listens to DBG_VALUEs and transfers of values
- /// between locations during stepping, creates new DBG_VALUEs when values move
- /// location.
- TransferTracker *TTracker;
+ StackSlotIdxes.insert({{Size, Offs}, Idx});
+ }
- /// Blocks which are artificial, i.e. blocks which exclusively contain
- /// instructions without DebugLocs, or with line 0 locations.
- SmallPtrSet<const MachineBasicBlock *, 16> ArtificialBlocks;
+ for (auto &Idx : StackSlotIdxes)
+ StackIdxesToPos[Idx.second] = Idx.first;
- // Mapping of blocks to and from their RPOT order.
- DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
- DenseMap<MachineBasicBlock *, unsigned int> BBToOrder;
- DenseMap<unsigned, unsigned> BBNumToRPO;
+ NumSlotIdxes = StackSlotIdxes.size();
+}
- /// Pair of MachineInstr, and its 1-based offset into the containing block.
- using InstAndNum = std::pair<const MachineInstr *, unsigned>;
- /// Map from debug instruction number to the MachineInstr labelled with that
- /// number, and its location within the function. Used to transform
- /// instruction numbers in DBG_INSTR_REFs into machine value numbers.
- std::map<uint64_t, InstAndNum> DebugInstrNumToInstr;
+LocIdx MLocTracker::trackRegister(unsigned ID) {
+ assert(ID != 0);
+ LocIdx NewIdx = LocIdx(LocIdxToIDNum.size());
+ LocIdxToIDNum.grow(NewIdx);
+ LocIdxToLocID.grow(NewIdx);
+
+ // Default: it's an mphi.
+ ValueIDNum ValNum = {CurBB, 0, NewIdx};
+ // Was this reg ever touched by a regmask?
+ for (const auto &MaskPair : reverse(Masks)) {
+ if (MaskPair.first->clobbersPhysReg(ID)) {
+ // There was an earlier def we skipped.
+ ValNum = {CurBB, MaskPair.second, NewIdx};
+ break;
+ }
+ }
- /// Record of where we observed a DBG_PHI instruction.
- class DebugPHIRecord {
- public:
- uint64_t InstrNum; ///< Instruction number of this DBG_PHI.
- MachineBasicBlock *MBB; ///< Block where DBG_PHI occurred.
- ValueIDNum ValueRead; ///< The value number read by the DBG_PHI.
- LocIdx ReadLoc; ///< Register/Stack location the DBG_PHI reads.
+ LocIdxToIDNum[NewIdx] = ValNum;
+ LocIdxToLocID[NewIdx] = ID;
+ return NewIdx;
+}
- operator unsigned() const { return InstrNum; }
- };
+void MLocTracker::writeRegMask(const MachineOperand *MO, unsigned CurBB,
+ unsigned InstID) {
+ // Def any register we track have that isn't preserved. The regmask
+ // terminates the liveness of a register, meaning its value can't be
+ // relied upon -- we represent this by giving it a new value.
+ for (auto Location : locations()) {
+ unsigned ID = LocIdxToLocID[Location.Idx];
+ // Don't clobber SP, even if the mask says it's clobbered.
+ if (ID < NumRegs && !SPAliases.count(ID) && MO->clobbersPhysReg(ID))
+ defReg(ID, CurBB, InstID);
+ }
+ Masks.push_back(std::make_pair(MO, InstID));
+}
- /// Map from instruction numbers defined by DBG_PHIs to a record of what that
- /// DBG_PHI read and where. Populated and edited during the machine value
- /// location problem -- we use LLVMs SSA Updater to fix changes by
- /// optimizations that destroy PHI instructions.
- SmallVector<DebugPHIRecord, 32> DebugPHINumToValue;
-
- // Map of overlapping variable fragments.
- OverlapMap OverlapFragments;
- VarToFragments SeenFragments;
-
- /// Tests whether this instruction is a spill to a stack slot.
- bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF);
-
- /// Decide if @MI is a spill instruction and return true if it is. We use 2
- /// criteria to make this decision:
- /// - Is this instruction a store to a spill slot?
- /// - Is there a register operand that is both used and killed?
- /// TODO: Store optimization can fold spills into other stores (including
- /// other spills). We do not handle this yet (more than one memory operand).
- bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF,
- unsigned &Reg);
-
- /// If a given instruction is identified as a spill, return the spill slot
- /// and set \p Reg to the spilled register.
- Optional<SpillLoc> isRestoreInstruction(const MachineInstr &MI,
- MachineFunction *MF, unsigned &Reg);
-
- /// Given a spill instruction, extract the register and offset used to
- /// address the spill slot in a target independent way.
- SpillLoc extractSpillBaseRegAndOffset(const MachineInstr &MI);
-
- /// Observe a single instruction while stepping through a block.
- void process(MachineInstr &MI, ValueIDNum **MLiveOuts = nullptr,
- ValueIDNum **MLiveIns = nullptr);
-
- /// Examines whether \p MI is a DBG_VALUE and notifies trackers.
- /// \returns true if MI was recognized and processed.
- bool transferDebugValue(const MachineInstr &MI);
-
- /// Examines whether \p MI is a DBG_INSTR_REF and notifies trackers.
- /// \returns true if MI was recognized and processed.
- bool transferDebugInstrRef(MachineInstr &MI, ValueIDNum **MLiveOuts,
- ValueIDNum **MLiveIns);
-
- /// Stores value-information about where this PHI occurred, and what
- /// instruction number is associated with it.
- /// \returns true if MI was recognized and processed.
- bool transferDebugPHI(MachineInstr &MI);
-
- /// Examines whether \p MI is copy instruction, and notifies trackers.
- /// \returns true if MI was recognized and processed.
- bool transferRegisterCopy(MachineInstr &MI);
-
- /// Examines whether \p MI is stack spill or restore instruction, and
- /// notifies trackers. \returns true if MI was recognized and processed.
- bool transferSpillOrRestoreInst(MachineInstr &MI);
-
- /// Examines \p MI for any registers that it defines, and notifies trackers.
- void transferRegisterDef(MachineInstr &MI);
-
- /// Copy one location to the other, accounting for movement of subregisters
- /// too.
- void performCopy(Register Src, Register Dst);
-
- void accumulateFragmentMap(MachineInstr &MI);
-
- /// Determine the machine value number referred to by (potentially several)
- /// DBG_PHI instructions. Block duplication and tail folding can duplicate
- /// DBG_PHIs, shifting the position where values in registers merge, and
- /// forming another mini-ssa problem to solve.
- /// \p Here the position of a DBG_INSTR_REF seeking a machine value number
- /// \p InstrNum Debug instruction number defined by DBG_PHI instructions.
- /// \returns The machine value number at position Here, or None.
- Optional<ValueIDNum> resolveDbgPHIs(MachineFunction &MF,
- ValueIDNum **MLiveOuts,
- ValueIDNum **MLiveIns, MachineInstr &Here,
- uint64_t InstrNum);
-
- /// Step through the function, recording register definitions and movements
- /// in an MLocTracker. Convert the observations into a per-block transfer
- /// function in \p MLocTransfer, suitable for using with the machine value
- /// location dataflow problem.
- void
- produceMLocTransferFunction(MachineFunction &MF,
- SmallVectorImpl<MLocTransferMap> &MLocTransfer,
- unsigned MaxNumBlocks);
-
- /// Solve the machine value location dataflow problem. Takes as input the
- /// transfer functions in \p MLocTransfer. Writes the output live-in and
- /// live-out arrays to the (initialized to zero) multidimensional arrays in
- /// \p MInLocs and \p MOutLocs. The outer dimension is indexed by block
- /// number, the inner by LocIdx.
- void mlocDataflow(ValueIDNum **MInLocs, ValueIDNum **MOutLocs,
- SmallVectorImpl<MLocTransferMap> &MLocTransfer);
-
- /// Perform a control flow join (lattice value meet) of the values in machine
- /// locations at \p MBB. Follows the algorithm described in the file-comment,
- /// reading live-outs of predecessors from \p OutLocs, the current live ins
- /// from \p InLocs, and assigning the newly computed live ins back into
- /// \p InLocs. \returns two bools -- the first indicates whether a change
- /// was made, the second whether a lattice downgrade occurred. If the latter
- /// is true, revisiting this block is necessary.
- std::tuple<bool, bool>
- mlocJoin(MachineBasicBlock &MBB,
- SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
- ValueIDNum **OutLocs, ValueIDNum *InLocs);
-
- /// Solve the variable value dataflow problem, for a single lexical scope.
- /// Uses the algorithm from the file comment to resolve control flow joins,
- /// although there are extra hacks, see vlocJoin. Reads the
- /// locations of values from the \p MInLocs and \p MOutLocs arrays (see
- /// mlocDataflow) and reads the variable values transfer function from
- /// \p AllTheVlocs. Live-in and Live-out variable values are stored locally,
- /// with the live-ins permanently stored to \p Output once the fixedpoint is
- /// reached.
- /// \p VarsWeCareAbout contains a collection of the variables in \p Scope
- /// that we should be tracking.
- /// \p AssignBlocks contains the set of blocks that aren't in \p Scope, but
- /// which do contain DBG_VALUEs, which VarLocBasedImpl tracks locations
- /// through.
- void vlocDataflow(const LexicalScope *Scope, const DILocation *DILoc,
- const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
- SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks,
- LiveInsT &Output, ValueIDNum **MOutLocs,
- ValueIDNum **MInLocs,
- SmallVectorImpl<VLocTracker> &AllTheVLocs);
-
- /// Compute the live-ins to a block, considering control flow merges according
- /// to the method in the file comment. Live out and live in variable values
- /// are stored in \p VLOCOutLocs and \p VLOCInLocs. The live-ins for \p MBB
- /// are computed and stored into \p VLOCInLocs. \returns true if the live-ins
- /// are modified.
- /// \p InLocsT Output argument, storage for calculated live-ins.
- /// \returns two bools -- the first indicates whether a change
- /// was made, the second whether a lattice downgrade occurred. If the latter
- /// is true, revisiting this block is necessary.
- std::tuple<bool, bool>
- vlocJoin(MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, LiveIdxT &VLOCInLocs,
- SmallPtrSet<const MachineBasicBlock *, 16> *VLOCVisited,
- unsigned BBNum, const SmallSet<DebugVariable, 4> &AllVars,
- ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
- SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks,
- SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore,
- DenseMap<DebugVariable, DbgValue> &InLocsT);
-
- /// Continue exploration of the variable-value lattice, as explained in the
- /// file-level comment. \p OldLiveInLocation contains the current
- /// exploration position, from which we need to descend further. \p Values
- /// contains the set of live-in values, \p CurBlockRPONum the RPO number of
- /// the current block, and \p CandidateLocations a set of locations that
- /// should be considered as PHI locations, if we reach the bottom of the
- /// lattice. \returns true if we should downgrade; the value is the agreeing
- /// value number in a non-backedge predecessor.
- bool vlocDowngradeLattice(const MachineBasicBlock &MBB,
- const DbgValue &OldLiveInLocation,
- const SmallVectorImpl<InValueT> &Values,
- unsigned CurBlockRPONum);
-
- /// For the given block and live-outs feeding into it, try to find a
- /// machine location where they all join. If a solution for all predecessors
- /// can't be found, a location where all non-backedge-predecessors join
- /// will be returned instead. While this method finds a join location, this
- /// says nothing as to whether it should be used.
- /// \returns Pair of value ID if found, and true when the correct value
- /// is available on all predecessor edges, or false if it's only available
- /// for non-backedge predecessors.
- std::tuple<Optional<ValueIDNum>, bool>
- pickVPHILoc(MachineBasicBlock &MBB, const DebugVariable &Var,
- const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs,
- ValueIDNum **MInLocs,
- const SmallVectorImpl<MachineBasicBlock *> &BlockOrders);
-
- /// Given the solutions to the two dataflow problems, machine value locations
- /// in \p MInLocs and live-in variable values in \p SavedLiveIns, runs the
- /// TransferTracker class over the function to produce live-in and transfer
- /// DBG_VALUEs, then inserts them. Groups of DBG_VALUEs are inserted in the
- /// order given by AllVarsNumbering -- this could be any stable order, but
- /// right now "order of appearence in function, when explored in RPO", so
- /// that we can compare explictly against VarLocBasedImpl.
- void emitLocations(MachineFunction &MF, LiveInsT SavedLiveIns,
- ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
- DenseMap<DebugVariable, unsigned> &AllVarsNumbering,
- const TargetPassConfig &TPC);
-
- /// Boilerplate computation of some initial sets, artifical blocks and
- /// RPOT block ordering.
- void initialSetup(MachineFunction &MF);
-
- bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) override;
+SpillLocationNo MLocTracker::getOrTrackSpillLoc(SpillLoc L) {
+ SpillLocationNo SpillID(SpillLocs.idFor(L));
+ if (SpillID.id() == 0) {
+ // Spill location is untracked: create record for this one, and all
+ // subregister slots too.
+ SpillID = SpillLocationNo(SpillLocs.insert(L));
+ for (unsigned StackIdx = 0; StackIdx < NumSlotIdxes; ++StackIdx) {
+ unsigned L = getSpillIDWithIdx(SpillID, StackIdx);
+ LocIdx Idx = LocIdx(LocIdxToIDNum.size()); // New idx
+ LocIdxToIDNum.grow(Idx);
+ LocIdxToLocID.grow(Idx);
+ LocIDToLocIdx.push_back(Idx);
+ LocIdxToLocID[Idx] = L;
+ // Initialize to PHI value; corresponds to the location's live-in value
+ // during transfer function construction.
+ LocIdxToIDNum[Idx] = ValueIDNum(CurBB, 0, Idx);
+ }
+ }
+ return SpillID;
+}
-public:
- /// Default construct and initialize the pass.
- InstrRefBasedLDV();
+std::string MLocTracker::LocIdxToName(LocIdx Idx) const {
+ unsigned ID = LocIdxToLocID[Idx];
+ if (ID >= NumRegs) {
+ StackSlotPos Pos = locIDToSpillIdx(ID);
+ ID -= NumRegs;
+ unsigned Slot = ID / NumSlotIdxes;
+ return Twine("slot ")
+ .concat(Twine(Slot).concat(Twine(" sz ").concat(Twine(Pos.first)
+ .concat(Twine(" offs ").concat(Twine(Pos.second))))))
+ .str();
+ } else {
+ return TRI.getRegAsmName(ID).str();
+ }
+}
- LLVM_DUMP_METHOD
- void dump_mloc_transfer(const MLocTransferMap &mloc_transfer) const;
+std::string MLocTracker::IDAsString(const ValueIDNum &Num) const {
+ std::string DefName = LocIdxToName(Num.getLoc());
+ return Num.asString(DefName);
+}
- bool isCalleeSaved(LocIdx L) {
- unsigned Reg = MTracker->LocIdxToLocID[L];
- for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
- if (CalleeSavedRegs.test(*RAI))
- return true;
- return false;
+#ifndef NDEBUG
+LLVM_DUMP_METHOD void MLocTracker::dump() {
+ for (auto Location : locations()) {
+ std::string MLocName = LocIdxToName(Location.Value.getLoc());
+ std::string DefName = Location.Value.asString(MLocName);
+ dbgs() << LocIdxToName(Location.Idx) << " --> " << DefName << "\n";
}
-};
+}
-} // end anonymous namespace
+LLVM_DUMP_METHOD void MLocTracker::dump_mloc_map() {
+ for (auto Location : locations()) {
+ std::string foo = LocIdxToName(Location.Idx);
+ dbgs() << "Idx " << Location.Idx.asU64() << " " << foo << "\n";
+ }
+}
+#endif
-//===----------------------------------------------------------------------===//
-// Implementation
-//===----------------------------------------------------------------------===//
+MachineInstrBuilder MLocTracker::emitLoc(Optional<LocIdx> MLoc,
+ const DebugVariable &Var,
+ const DbgValueProperties &Properties) {
+ DebugLoc DL = DILocation::get(Var.getVariable()->getContext(), 0, 0,
+ Var.getVariable()->getScope(),
+ const_cast<DILocation *>(Var.getInlinedAt()));
+ auto MIB = BuildMI(MF, DL, TII.get(TargetOpcode::DBG_VALUE));
+
+ const DIExpression *Expr = Properties.DIExpr;
+ if (!MLoc) {
+ // No location -> DBG_VALUE $noreg
+ MIB.addReg(0);
+ MIB.addReg(0);
+ } else if (LocIdxToLocID[*MLoc] >= NumRegs) {
+ unsigned LocID = LocIdxToLocID[*MLoc];
+ SpillLocationNo SpillID = locIDToSpill(LocID);
+ StackSlotPos StackIdx = locIDToSpillIdx(LocID);
+ unsigned short Offset = StackIdx.second;
+
+ // TODO: support variables that are located in spill slots, with non-zero
+ // offsets from the start of the spill slot. It would require some more
+ // complex DIExpression calculations. This doesn't seem to be produced by
+ // LLVM right now, so don't try and support it.
+ // Accept no-subregister slots and subregisters where the offset is zero.
+ // The consumer should already have type information to work out how large
+ // the variable is.
+ if (Offset == 0) {
+ const SpillLoc &Spill = SpillLocs[SpillID.id()];
+ Expr = TRI.prependOffsetExpression(Expr, DIExpression::ApplyOffset,
+ Spill.SpillOffset);
+ unsigned Base = Spill.SpillBase;
+ MIB.addReg(Base);
+ MIB.addImm(0);
+ } else {
+ // This is a stack location with a weird subregister offset: emit an undef
+ // DBG_VALUE instead.
+ MIB.addReg(0);
+ MIB.addReg(0);
+ }
+ } else {
+ // Non-empty, non-stack slot, must be a plain register.
+ unsigned LocID = LocIdxToLocID[*MLoc];
+ MIB.addReg(LocID);
+ if (Properties.Indirect)
+ MIB.addImm(0);
+ else
+ MIB.addReg(0);
+ }
-ValueIDNum ValueIDNum::EmptyValue = {UINT_MAX, UINT_MAX, UINT_MAX};
+ MIB.addMetadata(Var.getVariable());
+ MIB.addMetadata(Expr);
+ return MIB;
+}
/// Default construct and initialize the pass.
InstrRefBasedLDV::InstrRefBasedLDV() {}
+bool InstrRefBasedLDV::isCalleeSaved(LocIdx L) const {
+ unsigned Reg = MTracker->LocIdxToLocID[L];
+ for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
+ if (CalleeSavedRegs.test(*RAI))
+ return true;
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// Debug Range Extension Implementation
//===----------------------------------------------------------------------===//
@@ -1722,7 +877,7 @@ InstrRefBasedLDV::InstrRefBasedLDV() {}
// void InstrRefBasedLDV::printVarLocInMBB(..)
#endif
-SpillLoc
+SpillLocationNo
InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
assert(MI.hasOneMemOperand() &&
"Spill instruction does not have exactly one memory operand?");
@@ -1734,7 +889,28 @@ InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
const MachineBasicBlock *MBB = MI.getParent();
Register Reg;
StackOffset Offset = TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg);
- return {Reg, Offset};
+ return MTracker->getOrTrackSpillLoc({Reg, Offset});
+}
+
+Optional<LocIdx> InstrRefBasedLDV::findLocationForMemOperand(const MachineInstr &MI) {
+ SpillLocationNo SpillLoc = extractSpillBaseRegAndOffset(MI);
+
+ // Where in the stack slot is this value defined -- i.e., what size of value
+ // is this? An important question, because it could be loaded into a register
+ // from the stack at some point. Happily the memory operand will tell us
+ // the size written to the stack.
+ auto *MemOperand = *MI.memoperands_begin();
+ unsigned SizeInBits = MemOperand->getSizeInBits();
+
+ // Find that position in the stack indexes we're tracking.
+ auto IdxIt = MTracker->StackSlotIdxes.find({SizeInBits, 0});
+ if (IdxIt == MTracker->StackSlotIdxes.end())
+ // That index is not tracked. This is suprising, and unlikely to ever
+ // occur, but the safe action is to indicate the variable is optimised out.
+ return None;
+
+ unsigned SpillID = MTracker->getSpillIDWithIdx(SpillLoc, IdxIt->second);
+ return MTracker->getSpillMLoc(SpillID);
}
/// End all previous ranges related to @MI and start a new range from @MI
@@ -1759,6 +935,17 @@ bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) {
if (Scope == nullptr)
return true; // handled it; by doing nothing
+ // For now, ignore DBG_VALUE_LISTs when extending ranges. Allow it to
+ // contribute to locations in this block, but don't propagate further.
+ // Interpret it like a DBG_VALUE $noreg.
+ if (MI.isDebugValueList()) {
+ if (VTracker)
+ VTracker->defVar(MI, Properties, None);
+ if (TTracker)
+ TTracker->redefVar(MI, Properties, None);
+ return true;
+ }
+
const MachineOperand &MO = MI.getOperand(0);
// MLocTracker needs to know that this register is read, even if it's only
@@ -1852,16 +1039,25 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
const MachineInstr &TargetInstr = *InstrIt->second.first;
uint64_t BlockNo = TargetInstr.getParent()->getNumber();
- // Pick out the designated operand.
- assert(OpNo < TargetInstr.getNumOperands());
- const MachineOperand &MO = TargetInstr.getOperand(OpNo);
-
- // Today, this can only be a register.
- assert(MO.isReg() && MO.isDef());
-
- unsigned LocID = MTracker->getLocID(MO.getReg(), false);
- LocIdx L = MTracker->LocIDToLocIdx[LocID];
- NewID = ValueIDNum(BlockNo, InstrIt->second.second, L);
+ // Pick out the designated operand. It might be a memory reference, if
+ // a register def was folded into a stack store.
+ if (OpNo == MachineFunction::DebugOperandMemNumber &&
+ TargetInstr.hasOneMemOperand()) {
+ Optional<LocIdx> L = findLocationForMemOperand(TargetInstr);
+ if (L)
+ NewID = ValueIDNum(BlockNo, InstrIt->second.second, *L);
+ } else if (OpNo != MachineFunction::DebugOperandMemNumber) {
+ assert(OpNo < TargetInstr.getNumOperands());
+ const MachineOperand &MO = TargetInstr.getOperand(OpNo);
+
+ // Today, this can only be a register.
+ assert(MO.isReg() && MO.isDef());
+
+ unsigned LocID = MTracker->getLocID(MO.getReg());
+ LocIdx L = MTracker->LocIDToLocIdx[LocID];
+ NewID = ValueIDNum(BlockNo, InstrIt->second.second, L);
+ }
+ // else: NewID is left as None.
} else if (PHIIt != DebugPHINumToValue.end() && PHIIt->InstrNum == InstNo) {
// It's actually a PHI value. Which value it is might not be obvious, use
// the resolver helper to find out.
@@ -1957,7 +1153,7 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
Optional<LocIdx> FoundLoc = None;
for (auto Location : MTracker->locations()) {
LocIdx CurL = Location.Idx;
- ValueIDNum ID = MTracker->LocIdxToIDNum[CurL];
+ ValueIDNum ID = MTracker->readMLoc(CurL);
if (NewID && ID == NewID) {
// If this is the first location with that value, pick it. Otherwise,
// consider whether it's a "longer term" location.
@@ -2016,6 +1212,10 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) {
auto PHIRec = DebugPHIRecord(
{InstrNum, MI.getParent(), Num, MTracker->lookupOrTrackRegister(Reg)});
DebugPHINumToValue.push_back(PHIRec);
+
+ // Ensure this register is tracked.
+ for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
+ MTracker->lookupOrTrackRegister(*RAI);
} else {
// The value is whatever's in this stack slot.
assert(MO.isFI());
@@ -2026,19 +1226,46 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) {
if (MFI->isDeadObjectIndex(FI))
return true;
- // Identify this spill slot.
+ // Identify this spill slot, ensure it's tracked.
Register Base;
StackOffset Offs = TFI->getFrameIndexReference(*MI.getMF(), FI, Base);
SpillLoc SL = {Base, Offs};
- Optional<ValueIDNum> Num = MTracker->readSpill(SL);
+ SpillLocationNo SpillNo = MTracker->getOrTrackSpillLoc(SL);
+
+ // Problem: what value should we extract from the stack? LLVM does not
+ // record what size the last store to the slot was, and it would become
+ // sketchy after stack slot colouring anyway. Take a look at what values
+ // are stored on the stack, and pick the largest one that wasn't def'd
+ // by a spill (i.e., the value most likely to have been def'd in a register
+ // and then spilt.
+ std::array<unsigned, 4> CandidateSizes = {64, 32, 16, 8};
+ Optional<ValueIDNum> Result = None;
+ Optional<LocIdx> SpillLoc = None;
+ for (unsigned int I = 0; I < CandidateSizes.size(); ++I) {
+ unsigned SpillID = MTracker->getLocID(SpillNo, {CandidateSizes[I], 0});
+ SpillLoc = MTracker->getSpillMLoc(SpillID);
+ ValueIDNum Val = MTracker->readMLoc(*SpillLoc);
+ // If this value was defined in it's own position, then it was probably
+ // an aliasing index of a small value that was spilt.
+ if (Val.getLoc() != SpillLoc->asU64()) {
+ Result = Val;
+ break;
+ }
+ }
- if (!Num)
- // Nothing ever writes to this slot. Curious, but nothing we can do.
- return true;
+ // If we didn't find anything, we're probably looking at a PHI, or a memory
+ // store folded into an instruction. FIXME: Take a guess that's it's 64
+ // bits. This isn't ideal, but tracking the size that the spill is
+ // "supposed" to be is more complex, and benefits a small number of
+ // locations.
+ if (!Result) {
+ unsigned SpillID = MTracker->getLocID(SpillNo, {64, 0});
+ SpillLoc = MTracker->getSpillMLoc(SpillID);
+ Result = MTracker->readMLoc(*SpillLoc);
+ }
// Record this DBG_PHI for later analysis.
- auto DbgPHI = DebugPHIRecord(
- {InstrNum, MI.getParent(), *Num, *MTracker->getSpillMLoc(SL)});
+ auto DbgPHI = DebugPHIRecord({InstrNum, MI.getParent(), *Result, *SpillLoc});
DebugPHINumToValue.push_back(DbgPHI);
}
@@ -2061,10 +1288,6 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
} else if (MI.isMetaInstruction())
return;
- MachineFunction *MF = MI.getMF();
- const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
- Register SP = TLI->getStackPointerRegisterToSaveRestore();
-
// Find the regs killed by MI, and find regmasks of preserved regs.
// Max out the number of statically allocated elements in `DeadRegs`, as this
// prevents fallback to std::set::count() operations.
@@ -2075,7 +1298,7 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
// Determine whether the operand is a register def.
if (MO.isReg() && MO.isDef() && MO.getReg() &&
Register::isPhysicalRegister(MO.getReg()) &&
- !(MI.isCall() && MO.getReg() == SP)) {
+ !(MI.isCall() && MTracker->SPAliases.count(MO.getReg()))) {
// Remove ranges of all aliased registers.
for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
// FIXME: Can we break out of this loop early if no insertion occurs?
@@ -2093,6 +1316,16 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
for (auto *MO : RegMaskPtrs)
MTracker->writeRegMask(MO, CurBB, CurInst);
+ // If this instruction writes to a spill slot, def that slot.
+ if (hasFoldedStackStore(MI)) {
+ SpillLocationNo SpillNo = extractSpillBaseRegAndOffset(MI);
+ for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) {
+ unsigned SpillID = MTracker->getSpillIDWithIdx(SpillNo, I);
+ LocIdx L = MTracker->getSpillMLoc(SpillID);
+ MTracker->setMLoc(L, ValueIDNum(CurBB, CurInst, L));
+ }
+ }
+
if (!TTracker)
return;
@@ -2118,32 +1351,27 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
if (MO->clobbersPhysReg(Reg))
TTracker->clobberMloc(L.Idx, MI.getIterator(), false);
}
+
+ // Tell TTracker about any folded stack store.
+ if (hasFoldedStackStore(MI)) {
+ SpillLocationNo SpillNo = extractSpillBaseRegAndOffset(MI);
+ for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) {
+ unsigned SpillID = MTracker->getSpillIDWithIdx(SpillNo, I);
+ LocIdx L = MTracker->getSpillMLoc(SpillID);
+ TTracker->clobberMloc(L, MI.getIterator(), true);
+ }
+ }
}
void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) {
- ValueIDNum SrcValue = MTracker->readReg(SrcRegNum);
+ // In all circumstances, re-def all aliases. It's definitely a new value now.
+ for (MCRegAliasIterator RAI(DstRegNum, TRI, true); RAI.isValid(); ++RAI)
+ MTracker->defReg(*RAI, CurBB, CurInst);
+ ValueIDNum SrcValue = MTracker->readReg(SrcRegNum);
MTracker->setReg(DstRegNum, SrcValue);
- // In all circumstances, re-def the super registers. It's definitely a new
- // value now. This doesn't uniquely identify the composition of subregs, for
- // example, two identical values in subregisters composed in different
- // places would not get equal value numbers.
- for (MCSuperRegIterator SRI(DstRegNum, TRI); SRI.isValid(); ++SRI)
- MTracker->defReg(*SRI, CurBB, CurInst);
-
- // If we're emulating VarLocBasedImpl, just define all the subregisters.
- // DBG_VALUEs of them will expect to be tracked from the DBG_VALUE, not
- // through prior copies.
- if (EmulateOldLDV) {
- for (MCSubRegIndexIterator DRI(DstRegNum, TRI); DRI.isValid(); ++DRI)
- MTracker->defReg(DRI.getSubReg(), CurBB, CurInst);
- return;
- }
-
- // Otherwise, actually copy subregisters from one location to another.
- // XXX: in addition, any subregisters of DstRegNum that don't line up with
- // the source register should be def'd.
+ // Copy subregisters from one location to another.
for (MCSubRegIndexIterator SRI(SrcRegNum, TRI); SRI.isValid(); ++SRI) {
unsigned SrcSubReg = SRI.getSubReg();
unsigned SubRegIdx = SRI.getSubRegIndex();
@@ -2154,15 +1382,13 @@ void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) {
// Do copy. There are two matching subregisters, the source value should
// have been def'd when the super-reg was, the latter might not be tracked
// yet.
- // This will force SrcSubReg to be tracked, if it isn't yet.
- (void)MTracker->readReg(SrcSubReg);
- LocIdx SrcL = MTracker->getRegMLoc(SrcSubReg);
- assert(SrcL.asU64());
- (void)MTracker->readReg(DstSubReg);
- LocIdx DstL = MTracker->getRegMLoc(DstSubReg);
- assert(DstL.asU64());
+ // This will force SrcSubReg to be tracked, if it isn't yet. Will read
+ // mphi values if it wasn't tracked.
+ LocIdx SrcL = MTracker->lookupOrTrackRegister(SrcSubReg);
+ LocIdx DstL = MTracker->lookupOrTrackRegister(DstSubReg);
+ (void)SrcL;
(void)DstL;
- ValueIDNum CpyValue = {SrcValue.getBlock(), SrcValue.getInst(), SrcL};
+ ValueIDNum CpyValue = MTracker->readReg(SrcSubReg);
MTracker->setReg(DstSubReg, CpyValue);
}
@@ -2174,6 +1400,12 @@ bool InstrRefBasedLDV::isSpillInstruction(const MachineInstr &MI,
if (!MI.hasOneMemOperand())
return false;
+ // Reject any memory operand that's aliased -- we can't guarantee its value.
+ auto MMOI = MI.memoperands_begin();
+ const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue();
+ if (PVal->isAliased(MFI))
+ return false;
+
if (!MI.getSpillSize(TII) && !MI.getFoldedSpillSize(TII))
return false; // This is not a spill instruction, since no valid size was
// returned from either function.
@@ -2191,7 +1423,7 @@ bool InstrRefBasedLDV::isLocationSpill(const MachineInstr &MI,
return Reg != 0;
}
-Optional<SpillLoc>
+Optional<SpillLocationNo>
InstrRefBasedLDV::isRestoreInstruction(const MachineInstr &MI,
MachineFunction *MF, unsigned &Reg) {
if (!MI.hasOneMemOperand())
@@ -2213,84 +1445,117 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) {
if (EmulateOldLDV)
return false;
+ // Strictly limit ourselves to plain loads and stores, not all instructions
+ // that can access the stack.
+ int DummyFI = -1;
+ if (!TII->isStoreToStackSlotPostFE(MI, DummyFI) &&
+ !TII->isLoadFromStackSlotPostFE(MI, DummyFI))
+ return false;
+
MachineFunction *MF = MI.getMF();
unsigned Reg;
- Optional<SpillLoc> Loc;
LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump(););
+ // Strictly limit ourselves to plain loads and stores, not all instructions
+ // that can access the stack.
+ int FIDummy;
+ if (!TII->isStoreToStackSlotPostFE(MI, FIDummy) &&
+ !TII->isLoadFromStackSlotPostFE(MI, FIDummy))
+ return false;
+
// First, if there are any DBG_VALUEs pointing at a spill slot that is
// written to, terminate that variable location. The value in memory
// will have changed. DbgEntityHistoryCalculator doesn't try to detect this.
if (isSpillInstruction(MI, MF)) {
- Loc = extractSpillBaseRegAndOffset(MI);
-
- if (TTracker) {
- Optional<LocIdx> MLoc = MTracker->getSpillMLoc(*Loc);
- if (MLoc) {
- // Un-set this location before clobbering, so that we don't salvage
- // the variable location back to the same place.
- MTracker->setMLoc(*MLoc, ValueIDNum::EmptyValue);
+ SpillLocationNo Loc = extractSpillBaseRegAndOffset(MI);
+
+ // Un-set this location and clobber, so that earlier locations don't
+ // continue past this store.
+ for (unsigned SlotIdx = 0; SlotIdx < MTracker->NumSlotIdxes; ++SlotIdx) {
+ unsigned SpillID = MTracker->getSpillIDWithIdx(Loc, SlotIdx);
+ Optional<LocIdx> MLoc = MTracker->getSpillMLoc(SpillID);
+ if (!MLoc)
+ continue;
+
+ // We need to over-write the stack slot with something (here, a def at
+ // this instruction) to ensure no values are preserved in this stack slot
+ // after the spill. It also prevents TTracker from trying to recover the
+ // location and re-installing it in the same place.
+ ValueIDNum Def(CurBB, CurInst, *MLoc);
+ MTracker->setMLoc(*MLoc, Def);
+ if (TTracker)
TTracker->clobberMloc(*MLoc, MI.getIterator());
- }
}
}
// Try to recognise spill and restore instructions that may transfer a value.
if (isLocationSpill(MI, MF, Reg)) {
- Loc = extractSpillBaseRegAndOffset(MI);
- auto ValueID = MTracker->readReg(Reg);
+ SpillLocationNo Loc = extractSpillBaseRegAndOffset(MI);
- // If the location is empty, produce a phi, signify it's the live-in value.
- if (ValueID.getLoc() == 0)
- ValueID = {CurBB, 0, MTracker->getRegMLoc(Reg)};
+ auto DoTransfer = [&](Register SrcReg, unsigned SpillID) {
+ auto ReadValue = MTracker->readReg(SrcReg);
+ LocIdx DstLoc = MTracker->getSpillMLoc(SpillID);
+ MTracker->setMLoc(DstLoc, ReadValue);
+
+ if (TTracker) {
+ LocIdx SrcLoc = MTracker->getRegMLoc(SrcReg);
+ TTracker->transferMlocs(SrcLoc, DstLoc, MI.getIterator());
+ }
+ };
- MTracker->setSpill(*Loc, ValueID);
- auto OptSpillLocIdx = MTracker->getSpillMLoc(*Loc);
- assert(OptSpillLocIdx && "Spill slot set but has no LocIdx?");
- LocIdx SpillLocIdx = *OptSpillLocIdx;
+ // Then, transfer subreg bits.
+ for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) {
+ // Ensure this reg is tracked,
+ (void)MTracker->lookupOrTrackRegister(*SRI);
+ unsigned SubregIdx = TRI->getSubRegIndex(Reg, *SRI);
+ unsigned SpillID = MTracker->getLocID(Loc, SubregIdx);
+ DoTransfer(*SRI, SpillID);
+ }
- // Tell TransferTracker about this spill, produce DBG_VALUEs for it.
- if (TTracker)
- TTracker->transferMlocs(MTracker->getRegMLoc(Reg), SpillLocIdx,
- MI.getIterator());
+ // Directly lookup size of main source reg, and transfer.
+ unsigned Size = TRI->getRegSizeInBits(Reg, *MRI);
+ unsigned SpillID = MTracker->getLocID(Loc, {Size, 0});
+ DoTransfer(Reg, SpillID);
} else {
- if (!(Loc = isRestoreInstruction(MI, MF, Reg)))
+ Optional<SpillLocationNo> OptLoc = isRestoreInstruction(MI, MF, Reg);
+ if (!OptLoc)
return false;
+ SpillLocationNo Loc = *OptLoc;
- // Is there a value to be restored?
- auto OptValueID = MTracker->readSpill(*Loc);
- if (OptValueID) {
- ValueIDNum ValueID = *OptValueID;
- LocIdx SpillLocIdx = *MTracker->getSpillMLoc(*Loc);
- // XXX -- can we recover sub-registers of this value? Until we can, first
- // overwrite all defs of the register being restored to.
- for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
- MTracker->defReg(*RAI, CurBB, CurInst);
+ // Assumption: we're reading from the base of the stack slot, not some
+ // offset into it. It seems very unlikely LLVM would ever generate
+ // restores where this wasn't true. This then becomes a question of what
+ // subregisters in the destination register line up with positions in the
+ // stack slot.
- // Now override the reg we're restoring to.
- MTracker->setReg(Reg, ValueID);
+ // Def all registers that alias the destination.
+ for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
+ MTracker->defReg(*RAI, CurBB, CurInst);
+
+ // Now find subregisters within the destination register, and load values
+ // from stack slot positions.
+ auto DoTransfer = [&](Register DestReg, unsigned SpillID) {
+ LocIdx SrcIdx = MTracker->getSpillMLoc(SpillID);
+ auto ReadValue = MTracker->readMLoc(SrcIdx);
+ MTracker->setReg(DestReg, ReadValue);
+
+ if (TTracker) {
+ LocIdx DstLoc = MTracker->getRegMLoc(DestReg);
+ TTracker->transferMlocs(SrcIdx, DstLoc, MI.getIterator());
+ }
+ };
- // Report this restore to the transfer tracker too.
- if (TTracker)
- TTracker->transferMlocs(SpillLocIdx, MTracker->getRegMLoc(Reg),
- MI.getIterator());
- } else {
- // There isn't anything in the location; not clear if this is a code path
- // that still runs. Def this register anyway just in case.
- for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
- MTracker->defReg(*RAI, CurBB, CurInst);
-
- // Force the spill slot to be tracked.
- LocIdx L = MTracker->getOrTrackSpillLoc(*Loc);
-
- // Set the restored value to be a machine phi number, signifying that it's
- // whatever the spills live-in value is in this block. Definitely has
- // a LocIdx due to the setSpill above.
- ValueIDNum ValueID = {CurBB, 0, L};
- MTracker->setReg(Reg, ValueID);
- MTracker->setSpill(*Loc, ValueID);
+ for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) {
+ unsigned Subreg = TRI->getSubRegIndex(Reg, *SRI);
+ unsigned SpillID = MTracker->getLocID(Loc, Subreg);
+ DoTransfer(*SRI, SpillID);
}
+
+ // Directly look up this registers slot idx by size, and transfer.
+ unsigned Size = TRI->getRegSizeInBits(Reg, *MRI);
+ unsigned SpillID = MTracker->getLocID(Loc, {Size, 0});
+ DoTransfer(Reg, SpillID);
}
return true;
}
@@ -2510,12 +1775,11 @@ void InstrRefBasedLDV::produceMLocTransferFunction(
}
// Compute a bitvector of all the registers that are tracked in this block.
- const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
- Register SP = TLI->getStackPointerRegisterToSaveRestore();
BitVector UsedRegs(TRI->getNumRegs());
for (auto Location : MTracker->locations()) {
unsigned ID = MTracker->LocIdxToLocID[Location.Idx];
- if (ID >= TRI->getNumRegs() || ID == SP)
+ // Ignore stack slots, and aliases of the stack pointer.
+ if (ID >= TRI->getNumRegs() || MTracker->SPAliases.count(ID))
continue;
UsedRegs.set(ID);
}
@@ -2531,7 +1795,7 @@ void InstrRefBasedLDV::produceMLocTransferFunction(
// they're all clobbered or at least set in the designated transfer
// elem.
for (unsigned Bit : BV.set_bits()) {
- unsigned ID = MTracker->getLocID(Bit, false);
+ unsigned ID = MTracker->getLocID(Bit);
LocIdx Idx = MTracker->LocIDToLocIdx[ID];
auto &TransferMap = MLocTransfer[I];
@@ -2553,23 +1817,20 @@ void InstrRefBasedLDV::produceMLocTransferFunction(
}
}
-std::tuple<bool, bool>
-InstrRefBasedLDV::mlocJoin(MachineBasicBlock &MBB,
- SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
- ValueIDNum **OutLocs, ValueIDNum *InLocs) {
+bool InstrRefBasedLDV::mlocJoin(
+ MachineBasicBlock &MBB, SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
+ ValueIDNum **OutLocs, ValueIDNum *InLocs) {
LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
bool Changed = false;
- bool DowngradeOccurred = false;
- // Collect predecessors that have been visited. Anything that hasn't been
- // visited yet is a backedge on the first iteration, and the meet of it's
- // lattice value for all locations will be unaffected.
+ // Handle value-propagation when control flow merges on entry to a block. For
+ // any location without a PHI already placed, the location has the same value
+ // as its predecessors. If a PHI is placed, test to see whether it's now a
+ // redundant PHI that we can eliminate.
+
SmallVector<const MachineBasicBlock *, 8> BlockOrders;
- for (auto Pred : MBB.predecessors()) {
- if (Visited.count(Pred)) {
- BlockOrders.push_back(Pred);
- }
- }
+ for (auto Pred : MBB.predecessors())
+ BlockOrders.push_back(Pred);
// Visit predecessors in RPOT order.
auto Cmp = [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
@@ -2579,83 +1840,216 @@ InstrRefBasedLDV::mlocJoin(MachineBasicBlock &MBB,
// Skip entry block.
if (BlockOrders.size() == 0)
- return std::tuple<bool, bool>(false, false);
+ return false;
- // Step through all machine locations, then look at each predecessor and
- // detect disagreements.
- unsigned ThisBlockRPO = BBToOrder.find(&MBB)->second;
+ // Step through all machine locations, look at each predecessor and test
+ // whether we can eliminate redundant PHIs.
for (auto Location : MTracker->locations()) {
LocIdx Idx = Location.Idx;
+
// Pick out the first predecessors live-out value for this location. It's
- // guaranteed to be not a backedge, as we order by RPO.
- ValueIDNum BaseVal = OutLocs[BlockOrders[0]->getNumber()][Idx.asU64()];
+ // guaranteed to not be a backedge, as we order by RPO.
+ ValueIDNum FirstVal = OutLocs[BlockOrders[0]->getNumber()][Idx.asU64()];
+
+ // If we've already eliminated a PHI here, do no further checking, just
+ // propagate the first live-in value into this block.
+ if (InLocs[Idx.asU64()] != ValueIDNum(MBB.getNumber(), 0, Idx)) {
+ if (InLocs[Idx.asU64()] != FirstVal) {
+ InLocs[Idx.asU64()] = FirstVal;
+ Changed |= true;
+ }
+ continue;
+ }
- // Some flags for whether there's a disagreement, and whether it's a
- // disagreement with a backedge or not.
+ // We're now examining a PHI to see whether it's un-necessary. Loop around
+ // the other live-in values and test whether they're all the same.
bool Disagree = false;
- bool NonBackEdgeDisagree = false;
-
- // Loop around everything that wasn't 'base'.
for (unsigned int I = 1; I < BlockOrders.size(); ++I) {
- auto *MBB = BlockOrders[I];
- if (BaseVal != OutLocs[MBB->getNumber()][Idx.asU64()]) {
- // Live-out of a predecessor disagrees with the first predecessor.
- Disagree = true;
-
- // Test whether it's a disagreemnt in the backedges or not.
- if (BBToOrder.find(MBB)->second < ThisBlockRPO) // might be self b/e
- NonBackEdgeDisagree = true;
- }
- }
+ const MachineBasicBlock *PredMBB = BlockOrders[I];
+ const ValueIDNum &PredLiveOut =
+ OutLocs[PredMBB->getNumber()][Idx.asU64()];
- bool OverRide = false;
- if (Disagree && !NonBackEdgeDisagree) {
- // Only the backedges disagree. Consider demoting the livein
- // lattice value, as per the file level comment. The value we consider
- // demoting to is the value that the non-backedge predecessors agree on.
- // The order of values is that non-PHIs are \top, a PHI at this block
- // \bot, and phis between the two are ordered by their RPO number.
- // If there's no agreement, or we've already demoted to this PHI value
- // before, replace with a PHI value at this block.
-
- // Calculate order numbers: zero means normal def, nonzero means RPO
- // number.
- unsigned BaseBlockRPONum = BBNumToRPO[BaseVal.getBlock()] + 1;
- if (!BaseVal.isPHI())
- BaseBlockRPONum = 0;
-
- ValueIDNum &InLocID = InLocs[Idx.asU64()];
- unsigned InLocRPONum = BBNumToRPO[InLocID.getBlock()] + 1;
- if (!InLocID.isPHI())
- InLocRPONum = 0;
-
- // Should we ignore the disagreeing backedges, and override with the
- // value the other predecessors agree on (in "base")?
- unsigned ThisBlockRPONum = BBNumToRPO[MBB.getNumber()] + 1;
- if (BaseBlockRPONum > InLocRPONum && BaseBlockRPONum < ThisBlockRPONum) {
- // Override.
- OverRide = true;
- DowngradeOccurred = true;
- }
+ // Incoming values agree, continue trying to eliminate this PHI.
+ if (FirstVal == PredLiveOut)
+ continue;
+
+ // We can also accept a PHI value that feeds back into itself.
+ if (PredLiveOut == ValueIDNum(MBB.getNumber(), 0, Idx))
+ continue;
+
+ // Live-out of a predecessor disagrees with the first predecessor.
+ Disagree = true;
}
- // else: if we disagree in the non-backedges, then this is definitely
- // a control flow merge where different values merge. Make it a PHI.
- // Generate a phi...
- ValueIDNum PHI = {(uint64_t)MBB.getNumber(), 0, Idx};
- ValueIDNum NewVal = (Disagree && !OverRide) ? PHI : BaseVal;
- if (InLocs[Idx.asU64()] != NewVal) {
+ // No disagreement? No PHI. Otherwise, leave the PHI in live-ins.
+ if (!Disagree) {
+ InLocs[Idx.asU64()] = FirstVal;
Changed |= true;
- InLocs[Idx.asU64()] = NewVal;
}
}
// TODO: Reimplement NumInserted and NumRemoved.
- return std::tuple<bool, bool>(Changed, DowngradeOccurred);
+ return Changed;
+}
+
+void InstrRefBasedLDV::findStackIndexInterference(
+ SmallVectorImpl<unsigned> &Slots) {
+ // We could spend a bit of time finding the exact, minimal, set of stack
+ // indexes that interfere with each other, much like reg units. Or, we can
+ // rely on the fact that:
+ // * The smallest / lowest index will interfere with everything at zero
+ // offset, which will be the largest set of registers,
+ // * Most indexes with non-zero offset will end up being interference units
+ // anyway.
+ // So just pick those out and return them.
+
+ // We can rely on a single-byte stack index existing already, because we
+ // initialize them in MLocTracker.
+ auto It = MTracker->StackSlotIdxes.find({8, 0});
+ assert(It != MTracker->StackSlotIdxes.end());
+ Slots.push_back(It->second);
+
+ // Find anything that has a non-zero offset and add that too.
+ for (auto &Pair : MTracker->StackSlotIdxes) {
+ // Is offset zero? If so, ignore.
+ if (!Pair.first.second)
+ continue;
+ Slots.push_back(Pair.second);
+ }
}
-void InstrRefBasedLDV::mlocDataflow(
- ValueIDNum **MInLocs, ValueIDNum **MOutLocs,
+void InstrRefBasedLDV::placeMLocPHIs(
+ MachineFunction &MF, SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
+ ValueIDNum **MInLocs, SmallVectorImpl<MLocTransferMap> &MLocTransfer) {
+ SmallVector<unsigned, 4> StackUnits;
+ findStackIndexInterference(StackUnits);
+
+ // To avoid repeatedly running the PHI placement algorithm, leverage the
+ // fact that a def of register MUST also def its register units. Find the
+ // units for registers, place PHIs for them, and then replicate them for
+ // aliasing registers. Some inputs that are never def'd (DBG_PHIs of
+ // arguments) don't lead to register units being tracked, just place PHIs for
+ // those registers directly. Stack slots have their own form of "unit",
+ // store them to one side.
+ SmallSet<Register, 32> RegUnitsToPHIUp;
+ SmallSet<LocIdx, 32> NormalLocsToPHI;
+ SmallSet<SpillLocationNo, 32> StackSlots;
+ for (auto Location : MTracker->locations()) {
+ LocIdx L = Location.Idx;
+ if (MTracker->isSpill(L)) {
+ StackSlots.insert(MTracker->locIDToSpill(MTracker->LocIdxToLocID[L]));
+ continue;
+ }
+
+ Register R = MTracker->LocIdxToLocID[L];
+ SmallSet<Register, 8> FoundRegUnits;
+ bool AnyIllegal = false;
+ for (MCRegUnitIterator RUI(R.asMCReg(), TRI); RUI.isValid(); ++RUI) {
+ for (MCRegUnitRootIterator URoot(*RUI, TRI); URoot.isValid(); ++URoot){
+ if (!MTracker->isRegisterTracked(*URoot)) {
+ // Not all roots were loaded into the tracking map: this register
+ // isn't actually def'd anywhere, we only read from it. Generate PHIs
+ // for this reg, but don't iterate units.
+ AnyIllegal = true;
+ } else {
+ FoundRegUnits.insert(*URoot);
+ }
+ }
+ }
+
+ if (AnyIllegal) {
+ NormalLocsToPHI.insert(L);
+ continue;
+ }
+
+ RegUnitsToPHIUp.insert(FoundRegUnits.begin(), FoundRegUnits.end());
+ }
+
+ // Lambda to fetch PHIs for a given location, and write into the PHIBlocks
+ // collection.
+ SmallVector<MachineBasicBlock *, 32> PHIBlocks;
+ auto CollectPHIsForLoc = [&](LocIdx L) {
+ // Collect the set of defs.
+ SmallPtrSet<MachineBasicBlock *, 32> DefBlocks;
+ for (unsigned int I = 0; I < OrderToBB.size(); ++I) {
+ MachineBasicBlock *MBB = OrderToBB[I];
+ const auto &TransferFunc = MLocTransfer[MBB->getNumber()];
+ if (TransferFunc.find(L) != TransferFunc.end())
+ DefBlocks.insert(MBB);
+ }
+
+ // The entry block defs the location too: it's the live-in / argument value.
+ // Only insert if there are other defs though; everything is trivially live
+ // through otherwise.
+ if (!DefBlocks.empty())
+ DefBlocks.insert(&*MF.begin());
+
+ // Ask the SSA construction algorithm where we should put PHIs. Clear
+ // anything that might have been hanging around from earlier.
+ PHIBlocks.clear();
+ BlockPHIPlacement(AllBlocks, DefBlocks, PHIBlocks);
+ };
+
+ auto InstallPHIsAtLoc = [&PHIBlocks, &MInLocs](LocIdx L) {
+ for (const MachineBasicBlock *MBB : PHIBlocks)
+ MInLocs[MBB->getNumber()][L.asU64()] = ValueIDNum(MBB->getNumber(), 0, L);
+ };
+
+ // For locations with no reg units, just place PHIs.
+ for (LocIdx L : NormalLocsToPHI) {
+ CollectPHIsForLoc(L);
+ // Install those PHI values into the live-in value array.
+ InstallPHIsAtLoc(L);
+ }
+
+ // For stack slots, calculate PHIs for the equivalent of the units, then
+ // install for each index.
+ for (SpillLocationNo Slot : StackSlots) {
+ for (unsigned Idx : StackUnits) {
+ unsigned SpillID = MTracker->getSpillIDWithIdx(Slot, Idx);
+ LocIdx L = MTracker->getSpillMLoc(SpillID);
+ CollectPHIsForLoc(L);
+ InstallPHIsAtLoc(L);
+
+ // Find anything that aliases this stack index, install PHIs for it too.
+ unsigned Size, Offset;
+ std::tie(Size, Offset) = MTracker->StackIdxesToPos[Idx];
+ for (auto &Pair : MTracker->StackSlotIdxes) {
+ unsigned ThisSize, ThisOffset;
+ std::tie(ThisSize, ThisOffset) = Pair.first;
+ if (ThisSize + ThisOffset <= Offset || Size + Offset <= ThisOffset)
+ continue;
+
+ unsigned ThisID = MTracker->getSpillIDWithIdx(Slot, Pair.second);
+ LocIdx ThisL = MTracker->getSpillMLoc(ThisID);
+ InstallPHIsAtLoc(ThisL);
+ }
+ }
+ }
+
+ // For reg units, place PHIs, and then place them for any aliasing registers.
+ for (Register R : RegUnitsToPHIUp) {
+ LocIdx L = MTracker->lookupOrTrackRegister(R);
+ CollectPHIsForLoc(L);
+
+ // Install those PHI values into the live-in value array.
+ InstallPHIsAtLoc(L);
+
+ // Now find aliases and install PHIs for those.
+ for (MCRegAliasIterator RAI(R, TRI, true); RAI.isValid(); ++RAI) {
+ // Super-registers that are "above" the largest register read/written by
+ // the function will alias, but will not be tracked.
+ if (!MTracker->isRegisterTracked(*RAI))
+ continue;
+
+ LocIdx AliasLoc = MTracker->lookupOrTrackRegister(*RAI);
+ InstallPHIsAtLoc(AliasLoc);
+ }
+ }
+}
+
+void InstrRefBasedLDV::buildMLocValueMap(
+ MachineFunction &MF, ValueIDNum **MInLocs, ValueIDNum **MOutLocs,
SmallVectorImpl<MLocTransferMap> &MLocTransfer) {
std::priority_queue<unsigned int, std::vector<unsigned int>,
std::greater<unsigned int>>
@@ -2666,20 +2060,34 @@ void InstrRefBasedLDV::mlocDataflow(
// but this is probably not worth it.
SmallPtrSet<MachineBasicBlock *, 16> OnPending, OnWorklist;
- // Initialize worklist with every block to be visited.
+ // Initialize worklist with every block to be visited. Also produce list of
+ // all blocks.
+ SmallPtrSet<MachineBasicBlock *, 32> AllBlocks;
for (unsigned int I = 0; I < BBToOrder.size(); ++I) {
Worklist.push(I);
OnWorklist.insert(OrderToBB[I]);
+ AllBlocks.insert(OrderToBB[I]);
}
- MTracker->reset();
-
- // Set inlocs for entry block -- each as a PHI at the entry block. Represents
- // the incoming value to the function.
- MTracker->setMPhis(0);
+ // Initialize entry block to PHIs. These represent arguments.
for (auto Location : MTracker->locations())
- MInLocs[0][Location.Idx.asU64()] = Location.Value;
+ MInLocs[0][Location.Idx.asU64()] = ValueIDNum(0, 0, Location.Idx);
+ MTracker->reset();
+
+ // Start by placing PHIs, using the usual SSA constructor algorithm. Consider
+ // any machine-location that isn't live-through a block to be def'd in that
+ // block.
+ placeMLocPHIs(MF, AllBlocks, MInLocs, MLocTransfer);
+
+ // Propagate values to eliminate redundant PHIs. At the same time, this
+ // produces the table of Block x Location => Value for the entry to each
+ // block.
+ // The kind of PHIs we can eliminate are, for example, where one path in a
+ // conditional spills and restores a register, and the register still has
+ // the same value once control flow joins, unbeknowns to the PHI placement
+ // code. Propagating values allows us to identify such un-necessary PHIs and
+ // remove them.
SmallPtrSet<const MachineBasicBlock *, 16> Visited;
while (!Worklist.empty() || !Pending.empty()) {
// Vector for storing the evaluated block transfer function.
@@ -2691,16 +2099,10 @@ void InstrRefBasedLDV::mlocDataflow(
Worklist.pop();
// Join the values in all predecessor blocks.
- bool InLocsChanged, DowngradeOccurred;
- std::tie(InLocsChanged, DowngradeOccurred) =
- mlocJoin(*MBB, Visited, MOutLocs, MInLocs[CurBB]);
+ bool InLocsChanged;
+ InLocsChanged = mlocJoin(*MBB, Visited, MOutLocs, MInLocs[CurBB]);
InLocsChanged |= Visited.insert(MBB).second;
- // If a downgrade occurred, book us in for re-examination on the next
- // iteration.
- if (DowngradeOccurred && OnPending.insert(MBB).second)
- Pending.push(BBToOrder[MBB]);
-
// Don't examine transfer function if we've visited this loc at least
// once, and inlocs haven't changed.
if (!InLocsChanged)
@@ -2715,7 +2117,7 @@ void InstrRefBasedLDV::mlocDataflow(
for (auto &P : MLocTransfer[CurBB]) {
if (P.second.getBlock() == CurBB && P.second.isPHI()) {
// This is a movement of whatever was live in. Read it.
- ValueIDNum NewID = MTracker->getNumAtPos(P.second.getLoc());
+ ValueIDNum NewID = MTracker->readMLoc(P.second.getLoc());
ToRemap.push_back(std::make_pair(P.first, NewID));
} else {
// It's a def. Just set it.
@@ -2745,8 +2147,8 @@ void InstrRefBasedLDV::mlocDataflow(
continue;
// All successors should be visited: put any back-edges on the pending
- // list for the next dataflow iteration, and any other successors to be
- // visited this iteration, if they're not going to be already.
+ // list for the next pass-through, and any other successors to be
+ // visited this pass, if they're not going to be already.
for (auto s : MBB->successors()) {
// Does branching to this successor represent a back-edge?
if (BBToOrder[s] > BBToOrder[MBB]) {
@@ -2769,170 +2171,169 @@ void InstrRefBasedLDV::mlocDataflow(
assert(Pending.empty() && "Pending should be empty");
}
- // Once all the live-ins don't change on mlocJoin(), we've reached a
- // fixedpoint.
+ // Once all the live-ins don't change on mlocJoin(), we've eliminated all
+ // redundant PHIs.
}
-bool InstrRefBasedLDV::vlocDowngradeLattice(
- const MachineBasicBlock &MBB, const DbgValue &OldLiveInLocation,
- const SmallVectorImpl<InValueT> &Values, unsigned CurBlockRPONum) {
- // Ranking value preference: see file level comment, the highest rank is
- // a plain def, followed by PHI values in reverse post-order. Numerically,
- // we assign all defs the rank '0', all PHIs their blocks RPO number plus
- // one, and consider the lowest value the highest ranked.
- int OldLiveInRank = BBNumToRPO[OldLiveInLocation.ID.getBlock()] + 1;
- if (!OldLiveInLocation.ID.isPHI())
- OldLiveInRank = 0;
-
- // Allow any unresolvable conflict to be over-ridden.
- if (OldLiveInLocation.Kind == DbgValue::NoVal) {
- // Although if it was an unresolvable conflict from _this_ block, then
- // all other seeking of downgrades and PHIs must have failed before hand.
- if (OldLiveInLocation.BlockNo == (unsigned)MBB.getNumber())
- return false;
- OldLiveInRank = INT_MIN;
- }
-
- auto &InValue = *Values[0].second;
+// Boilerplate for feeding MachineBasicBlocks into IDF calculator. Provide
+// template specialisations for graph traits and a successor enumerator.
+namespace llvm {
+template <> struct GraphTraits<MachineBasicBlock> {
+ using NodeRef = MachineBasicBlock *;
+ using ChildIteratorType = MachineBasicBlock::succ_iterator;
- if (InValue.Kind == DbgValue::Const || InValue.Kind == DbgValue::NoVal)
- return false;
+ static NodeRef getEntryNode(MachineBasicBlock *BB) { return BB; }
+ static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
+ static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
+};
- unsigned ThisRPO = BBNumToRPO[InValue.ID.getBlock()];
- int ThisRank = ThisRPO + 1;
- if (!InValue.ID.isPHI())
- ThisRank = 0;
+template <> struct GraphTraits<const MachineBasicBlock> {
+ using NodeRef = const MachineBasicBlock *;
+ using ChildIteratorType = MachineBasicBlock::const_succ_iterator;
- // Too far down the lattice?
- if (ThisRPO >= CurBlockRPONum)
- return false;
+ static NodeRef getEntryNode(const MachineBasicBlock *BB) { return BB; }
+ static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
+ static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
+};
- // Higher in the lattice than what we've already explored?
- if (ThisRank <= OldLiveInRank)
- return false;
+using MachineDomTreeBase = DomTreeBase<MachineBasicBlock>::NodeType;
+using MachineDomTreeChildGetter =
+ typename IDFCalculatorDetail::ChildrenGetterTy<MachineDomTreeBase, false>;
- return true;
+namespace IDFCalculatorDetail {
+template <>
+typename MachineDomTreeChildGetter::ChildrenTy
+MachineDomTreeChildGetter::get(const NodeRef &N) {
+ return {N->succ_begin(), N->succ_end()};
+}
+} // namespace IDFCalculatorDetail
+} // namespace llvm
+
+void InstrRefBasedLDV::BlockPHIPlacement(
+ const SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
+ const SmallPtrSetImpl<MachineBasicBlock *> &DefBlocks,
+ SmallVectorImpl<MachineBasicBlock *> &PHIBlocks) {
+ // Apply IDF calculator to the designated set of location defs, storing
+ // required PHIs into PHIBlocks. Uses the dominator tree stored in the
+ // InstrRefBasedLDV object.
+ IDFCalculatorDetail::ChildrenGetterTy<MachineDomTreeBase, false> foo;
+ IDFCalculatorBase<MachineDomTreeBase, false> IDF(DomTree->getBase(), foo);
+
+ IDF.setLiveInBlocks(AllBlocks);
+ IDF.setDefiningBlocks(DefBlocks);
+ IDF.calculate(PHIBlocks);
}
-std::tuple<Optional<ValueIDNum>, bool> InstrRefBasedLDV::pickVPHILoc(
- MachineBasicBlock &MBB, const DebugVariable &Var, const LiveIdxT &LiveOuts,
- ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
- const SmallVectorImpl<MachineBasicBlock *> &BlockOrders) {
+Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc(
+ const MachineBasicBlock &MBB, const DebugVariable &Var,
+ const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs,
+ const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders) {
// Collect a set of locations from predecessor where its live-out value can
// be found.
SmallVector<SmallVector<LocIdx, 4>, 8> Locs;
+ SmallVector<const DbgValueProperties *, 4> Properties;
unsigned NumLocs = MTracker->getNumLocs();
- unsigned BackEdgesStart = 0;
- for (auto p : BlockOrders) {
- // Pick out where backedges start in the list of predecessors. Relies on
- // BlockOrders being sorted by RPO.
- if (BBToOrder[p] < BBToOrder[&MBB])
- ++BackEdgesStart;
+ // No predecessors means no PHIs.
+ if (BlockOrders.empty())
+ return None;
- // For each predecessor, create a new set of locations.
- Locs.resize(Locs.size() + 1);
+ for (auto p : BlockOrders) {
unsigned ThisBBNum = p->getNumber();
- auto LiveOutMap = LiveOuts.find(p);
- if (LiveOutMap == LiveOuts.end())
- // This predecessor isn't in scope, it must have no live-in/live-out
- // locations.
- continue;
-
- auto It = LiveOutMap->second->find(Var);
- if (It == LiveOutMap->second->end())
- // There's no value recorded for this variable in this predecessor,
- // leave an empty set of locations.
- continue;
-
- const DbgValue &OutVal = It->second;
+ auto OutValIt = LiveOuts.find(p);
+ if (OutValIt == LiveOuts.end())
+ // If we have a predecessor not in scope, we'll never find a PHI position.
+ return None;
+ const DbgValue &OutVal = *OutValIt->second;
if (OutVal.Kind == DbgValue::Const || OutVal.Kind == DbgValue::NoVal)
// Consts and no-values cannot have locations we can join on.
- continue;
+ return None;
- assert(OutVal.Kind == DbgValue::Proposed || OutVal.Kind == DbgValue::Def);
- ValueIDNum ValToLookFor = OutVal.ID;
+ Properties.push_back(&OutVal.Properties);
+
+ // Create new empty vector of locations.
+ Locs.resize(Locs.size() + 1);
- // Search the live-outs of the predecessor for the specified value.
- for (unsigned int I = 0; I < NumLocs; ++I) {
- if (MOutLocs[ThisBBNum][I] == ValToLookFor)
- Locs.back().push_back(LocIdx(I));
+ // If the live-in value is a def, find the locations where that value is
+ // present. Do the same for VPHIs where we know the VPHI value.
+ if (OutVal.Kind == DbgValue::Def ||
+ (OutVal.Kind == DbgValue::VPHI && OutVal.BlockNo != MBB.getNumber() &&
+ OutVal.ID != ValueIDNum::EmptyValue)) {
+ ValueIDNum ValToLookFor = OutVal.ID;
+ // Search the live-outs of the predecessor for the specified value.
+ for (unsigned int I = 0; I < NumLocs; ++I) {
+ if (MOutLocs[ThisBBNum][I] == ValToLookFor)
+ Locs.back().push_back(LocIdx(I));
+ }
+ } else {
+ assert(OutVal.Kind == DbgValue::VPHI);
+ // For VPHIs where we don't know the location, we definitely can't find
+ // a join loc.
+ if (OutVal.BlockNo != MBB.getNumber())
+ return None;
+
+ // Otherwise: this is a VPHI on a backedge feeding back into itself, i.e.
+ // a value that's live-through the whole loop. (It has to be a backedge,
+ // because a block can't dominate itself). We can accept as a PHI location
+ // any location where the other predecessors agree, _and_ the machine
+ // locations feed back into themselves. Therefore, add all self-looping
+ // machine-value PHI locations.
+ for (unsigned int I = 0; I < NumLocs; ++I) {
+ ValueIDNum MPHI(MBB.getNumber(), 0, LocIdx(I));
+ if (MOutLocs[ThisBBNum][I] == MPHI)
+ Locs.back().push_back(LocIdx(I));
+ }
}
}
- // If there were no locations at all, return an empty result.
- if (Locs.empty())
- return std::tuple<Optional<ValueIDNum>, bool>(None, false);
-
- // Lambda for seeking a common location within a range of location-sets.
- using LocsIt = SmallVector<SmallVector<LocIdx, 4>, 8>::iterator;
- auto SeekLocation =
- [&Locs](llvm::iterator_range<LocsIt> SearchRange) -> Optional<LocIdx> {
- // Starting with the first set of locations, take the intersection with
- // subsequent sets.
- SmallVector<LocIdx, 4> base = Locs[0];
- for (auto &S : SearchRange) {
- SmallVector<LocIdx, 4> new_base;
- std::set_intersection(base.begin(), base.end(), S.begin(), S.end(),
- std::inserter(new_base, new_base.begin()));
- base = new_base;
- }
- if (base.empty())
- return None;
+ // We should have found locations for all predecessors, or returned.
+ assert(Locs.size() == BlockOrders.size());
- // We now have a set of LocIdxes that contain the right output value in
- // each of the predecessors. Pick the lowest; if there's a register loc,
- // that'll be it.
- return *base.begin();
- };
+ // Check that all properties are the same. We can't pick a location if they're
+ // not.
+ const DbgValueProperties *Properties0 = Properties[0];
+ for (auto *Prop : Properties)
+ if (*Prop != *Properties0)
+ return None;
- // Search for a common location for all predecessors. If we can't, then fall
- // back to only finding a common location between non-backedge predecessors.
- bool ValidForAllLocs = true;
- auto TheLoc = SeekLocation(Locs);
- if (!TheLoc) {
- ValidForAllLocs = false;
- TheLoc =
- SeekLocation(make_range(Locs.begin(), Locs.begin() + BackEdgesStart));
- }
+ // Starting with the first set of locations, take the intersection with
+ // subsequent sets.
+ SmallVector<LocIdx, 4> CandidateLocs = Locs[0];
+ for (unsigned int I = 1; I < Locs.size(); ++I) {
+ auto &LocVec = Locs[I];
+ SmallVector<LocIdx, 4> NewCandidates;
+ std::set_intersection(CandidateLocs.begin(), CandidateLocs.end(),
+ LocVec.begin(), LocVec.end(), std::inserter(NewCandidates, NewCandidates.begin()));
+ CandidateLocs = NewCandidates;
+ }
+ if (CandidateLocs.empty())
+ return None;
- if (!TheLoc)
- return std::tuple<Optional<ValueIDNum>, bool>(None, false);
+ // We now have a set of LocIdxes that contain the right output value in
+ // each of the predecessors. Pick the lowest; if there's a register loc,
+ // that'll be it.
+ LocIdx L = *CandidateLocs.begin();
// Return a PHI-value-number for the found location.
- LocIdx L = *TheLoc;
ValueIDNum PHIVal = {(unsigned)MBB.getNumber(), 0, L};
- return std::tuple<Optional<ValueIDNum>, bool>(PHIVal, ValidForAllLocs);
+ return PHIVal;
}
-std::tuple<bool, bool> InstrRefBasedLDV::vlocJoin(
- MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, LiveIdxT &VLOCInLocs,
- SmallPtrSet<const MachineBasicBlock *, 16> *VLOCVisited, unsigned BBNum,
- const SmallSet<DebugVariable, 4> &AllVars, ValueIDNum **MOutLocs,
- ValueIDNum **MInLocs,
+bool InstrRefBasedLDV::vlocJoin(
+ MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs,
SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks,
SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore,
- DenseMap<DebugVariable, DbgValue> &InLocsT) {
- bool DowngradeOccurred = false;
-
+ DbgValue &LiveIn) {
// To emulate VarLocBasedImpl, process this block if it's not in scope but
// _does_ assign a variable value. No live-ins for this scope are transferred
// in though, so we can return immediately.
- if (InScopeBlocks.count(&MBB) == 0 && !ArtificialBlocks.count(&MBB)) {
- if (VLOCVisited)
- return std::tuple<bool, bool>(true, false);
- return std::tuple<bool, bool>(false, false);
- }
+ if (InScopeBlocks.count(&MBB) == 0 && !ArtificialBlocks.count(&MBB))
+ return false;
LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
bool Changed = false;
- // Find any live-ins computed in a prior iteration.
- auto ILSIt = VLOCInLocs.find(&MBB);
- assert(ILSIt != VLOCInLocs.end());
- auto &ILS = *ILSIt->second;
-
// Order predecessors by RPOT order, for exploring them in that order.
SmallVector<MachineBasicBlock *, 8> BlockOrders(MBB.predecessors());
@@ -2944,244 +2345,102 @@ std::tuple<bool, bool> InstrRefBasedLDV::vlocJoin(
unsigned CurBlockRPONum = BBToOrder[&MBB];
- // Force a re-visit to loop heads in the first dataflow iteration.
- // FIXME: if we could "propose" Const values this wouldn't be needed,
- // because they'd need to be confirmed before being emitted.
- if (!BlockOrders.empty() &&
- BBToOrder[BlockOrders[BlockOrders.size() - 1]] >= CurBlockRPONum &&
- VLOCVisited)
- DowngradeOccurred = true;
-
- auto ConfirmValue = [&InLocsT](const DebugVariable &DV, DbgValue VR) {
- auto Result = InLocsT.insert(std::make_pair(DV, VR));
- (void)Result;
- assert(Result.second);
- };
-
- auto ConfirmNoVal = [&ConfirmValue, &MBB](const DebugVariable &Var, const DbgValueProperties &Properties) {
- DbgValue NoLocPHIVal(MBB.getNumber(), Properties, DbgValue::NoVal);
-
- ConfirmValue(Var, NoLocPHIVal);
- };
+ // Collect all the incoming DbgValues for this variable, from predecessor
+ // live-out values.
+ SmallVector<InValueT, 8> Values;
+ bool Bail = false;
+ int BackEdgesStart = 0;
+ for (auto p : BlockOrders) {
+ // If the predecessor isn't in scope / to be explored, we'll never be
+ // able to join any locations.
+ if (!BlocksToExplore.contains(p)) {
+ Bail = true;
+ break;
+ }
- // Attempt to join the values for each variable.
- for (auto &Var : AllVars) {
- // Collect all the DbgValues for this variable.
- SmallVector<InValueT, 8> Values;
- bool Bail = false;
- unsigned BackEdgesStart = 0;
- for (auto p : BlockOrders) {
- // If the predecessor isn't in scope / to be explored, we'll never be
- // able to join any locations.
- if (!BlocksToExplore.contains(p)) {
- Bail = true;
- break;
- }
+ // All Live-outs will have been initialized.
+ DbgValue &OutLoc = *VLOCOutLocs.find(p)->second;
- // Don't attempt to handle unvisited predecessors: they're implicitly
- // "unknown"s in the lattice.
- if (VLOCVisited && !VLOCVisited->count(p))
- continue;
+ // Keep track of where back-edges begin in the Values vector. Relies on
+ // BlockOrders being sorted by RPO.
+ unsigned ThisBBRPONum = BBToOrder[p];
+ if (ThisBBRPONum < CurBlockRPONum)
+ ++BackEdgesStart;
- // If the predecessors OutLocs is absent, there's not much we can do.
- auto OL = VLOCOutLocs.find(p);
- if (OL == VLOCOutLocs.end()) {
- Bail = true;
- break;
- }
+ Values.push_back(std::make_pair(p, &OutLoc));
+ }
- // No live-out value for this predecessor also means we can't produce
- // a joined value.
- auto VIt = OL->second->find(Var);
- if (VIt == OL->second->end()) {
- Bail = true;
- break;
- }
+ // If there were no values, or one of the predecessors couldn't have a
+ // value, then give up immediately. It's not safe to produce a live-in
+ // value. Leave as whatever it was before.
+ if (Bail || Values.size() == 0)
+ return false;
- // Keep track of where back-edges begin in the Values vector. Relies on
- // BlockOrders being sorted by RPO.
- unsigned ThisBBRPONum = BBToOrder[p];
- if (ThisBBRPONum < CurBlockRPONum)
- ++BackEdgesStart;
+ // All (non-entry) blocks have at least one non-backedge predecessor.
+ // Pick the variable value from the first of these, to compare against
+ // all others.
+ const DbgValue &FirstVal = *Values[0].second;
+
+ // If the old live-in value is not a PHI then either a) no PHI is needed
+ // here, or b) we eliminated the PHI that was here. If so, we can just
+ // propagate in the first parent's incoming value.
+ if (LiveIn.Kind != DbgValue::VPHI || LiveIn.BlockNo != MBB.getNumber()) {
+ Changed = LiveIn != FirstVal;
+ if (Changed)
+ LiveIn = FirstVal;
+ return Changed;
+ }
+
+ // Scan for variable values that can never be resolved: if they have
+ // different DIExpressions, different indirectness, or are mixed constants /
+ // non-constants.
+ for (auto &V : Values) {
+ if (V.second->Properties != FirstVal.Properties)
+ return false;
+ if (V.second->Kind == DbgValue::NoVal)
+ return false;
+ if (V.second->Kind == DbgValue::Const && FirstVal.Kind != DbgValue::Const)
+ return false;
+ }
- Values.push_back(std::make_pair(p, &VIt->second));
- }
+ // Try to eliminate this PHI. Do the incoming values all agree?
+ bool Disagree = false;
+ for (auto &V : Values) {
+ if (*V.second == FirstVal)
+ continue; // No disagreement.
- // If there were no values, or one of the predecessors couldn't have a
- // value, then give up immediately. It's not safe to produce a live-in
- // value.
- if (Bail || Values.size() == 0)
+ // Eliminate if a backedge feeds a VPHI back into itself.
+ if (V.second->Kind == DbgValue::VPHI &&
+ V.second->BlockNo == MBB.getNumber() &&
+ // Is this a backedge?
+ std::distance(Values.begin(), &V) >= BackEdgesStart)
continue;
- // Enumeration identifying the current state of the predecessors values.
- enum {
- Unset = 0,
- Agreed, // All preds agree on the variable value.
- PropDisagree, // All preds agree, but the value kind is Proposed in some.
- BEDisagree, // Only back-edges disagree on variable value.
- PHINeeded, // Non-back-edge predecessors have conflicing values.
- NoSolution // Conflicting Value metadata makes solution impossible.
- } OurState = Unset;
-
- // All (non-entry) blocks have at least one non-backedge predecessor.
- // Pick the variable value from the first of these, to compare against
- // all others.
- const DbgValue &FirstVal = *Values[0].second;
- const ValueIDNum &FirstID = FirstVal.ID;
-
- // Scan for variable values that can't be resolved: if they have different
- // DIExpressions, different indirectness, or are mixed constants /
- // non-constants.
- for (auto &V : Values) {
- if (V.second->Properties != FirstVal.Properties)
- OurState = NoSolution;
- if (V.second->Kind == DbgValue::Const && FirstVal.Kind != DbgValue::Const)
- OurState = NoSolution;
- }
-
- // Flags diagnosing _how_ the values disagree.
- bool NonBackEdgeDisagree = false;
- bool DisagreeOnPHINess = false;
- bool IDDisagree = false;
- bool Disagree = false;
- if (OurState == Unset) {
- for (auto &V : Values) {
- if (*V.second == FirstVal)
- continue; // No disagreement.
-
- Disagree = true;
-
- // Flag whether the value number actually diagrees.
- if (V.second->ID != FirstID)
- IDDisagree = true;
-
- // Distinguish whether disagreement happens in backedges or not.
- // Relies on Values (and BlockOrders) being sorted by RPO.
- unsigned ThisBBRPONum = BBToOrder[V.first];
- if (ThisBBRPONum < CurBlockRPONum)
- NonBackEdgeDisagree = true;
-
- // Is there a difference in whether the value is definite or only
- // proposed?
- if (V.second->Kind != FirstVal.Kind &&
- (V.second->Kind == DbgValue::Proposed ||
- V.second->Kind == DbgValue::Def) &&
- (FirstVal.Kind == DbgValue::Proposed ||
- FirstVal.Kind == DbgValue::Def))
- DisagreeOnPHINess = true;
- }
-
- // Collect those flags together and determine an overall state for
- // what extend the predecessors agree on a live-in value.
- if (!Disagree)
- OurState = Agreed;
- else if (!IDDisagree && DisagreeOnPHINess)
- OurState = PropDisagree;
- else if (!NonBackEdgeDisagree)
- OurState = BEDisagree;
- else
- OurState = PHINeeded;
- }
-
- // An extra indicator: if we only disagree on whether the value is a
- // Def, or proposed, then also flag whether that disagreement happens
- // in backedges only.
- bool PropOnlyInBEs = Disagree && !IDDisagree && DisagreeOnPHINess &&
- !NonBackEdgeDisagree && FirstVal.Kind == DbgValue::Def;
-
- const auto &Properties = FirstVal.Properties;
-
- auto OldLiveInIt = ILS.find(Var);
- const DbgValue *OldLiveInLocation =
- (OldLiveInIt != ILS.end()) ? &OldLiveInIt->second : nullptr;
-
- bool OverRide = false;
- if (OurState == BEDisagree && OldLiveInLocation) {
- // Only backedges disagree: we can consider downgrading. If there was a
- // previous live-in value, use it to work out whether the current
- // incoming value represents a lattice downgrade or not.
- OverRide =
- vlocDowngradeLattice(MBB, *OldLiveInLocation, Values, CurBlockRPONum);
- }
-
- // Use the current state of predecessor agreement and other flags to work
- // out what to do next. Possibilities include:
- // * Accept a value all predecessors agree on, or accept one that
- // represents a step down the exploration lattice,
- // * Use a PHI value number, if one can be found,
- // * Propose a PHI value number, and see if it gets confirmed later,
- // * Emit a 'NoVal' value, indicating we couldn't resolve anything.
- if (OurState == Agreed) {
- // Easiest solution: all predecessors agree on the variable value.
- ConfirmValue(Var, FirstVal);
- } else if (OurState == BEDisagree && OverRide) {
- // Only backedges disagree, and the other predecessors have produced
- // a new live-in value further down the exploration lattice.
- DowngradeOccurred = true;
- ConfirmValue(Var, FirstVal);
- } else if (OurState == PropDisagree) {
- // Predecessors agree on value, but some say it's only a proposed value.
- // Propagate it as proposed: unless it was proposed in this block, in
- // which case we're able to confirm the value.
- if (FirstID.getBlock() == (uint64_t)MBB.getNumber() && FirstID.isPHI()) {
- ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Def));
- } else if (PropOnlyInBEs) {
- // If only backedges disagree, a higher (in RPO) block confirmed this
- // location, and we need to propagate it into this loop.
- ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Def));
- } else {
- // Otherwise; a Def meeting a Proposed is still a Proposed.
- ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Proposed));
- }
- } else if ((OurState == PHINeeded || OurState == BEDisagree)) {
- // Predecessors disagree and can't be downgraded: this can only be
- // solved with a PHI. Use pickVPHILoc to go look for one.
- Optional<ValueIDNum> VPHI;
- bool AllEdgesVPHI = false;
- std::tie(VPHI, AllEdgesVPHI) =
- pickVPHILoc(MBB, Var, VLOCOutLocs, MOutLocs, MInLocs, BlockOrders);
-
- if (VPHI && AllEdgesVPHI) {
- // There's a PHI value that's valid for all predecessors -- we can use
- // it. If any of the non-backedge predecessors have proposed values
- // though, this PHI is also only proposed, until the predecessors are
- // confirmed.
- DbgValue::KindT K = DbgValue::Def;
- for (unsigned int I = 0; I < BackEdgesStart; ++I)
- if (Values[I].second->Kind == DbgValue::Proposed)
- K = DbgValue::Proposed;
-
- ConfirmValue(Var, DbgValue(*VPHI, Properties, K));
- } else if (VPHI) {
- // There's a PHI value, but it's only legal for backedges. Leave this
- // as a proposed PHI value: it might come back on the backedges,
- // and allow us to confirm it in the future.
- DbgValue NoBEValue = DbgValue(*VPHI, Properties, DbgValue::Proposed);
- ConfirmValue(Var, NoBEValue);
- } else {
- ConfirmNoVal(Var, Properties);
- }
- } else {
- // Otherwise: we don't know. Emit a "phi but no real loc" phi.
- ConfirmNoVal(Var, Properties);
- }
+ Disagree = true;
}
- // Store newly calculated in-locs into VLOCInLocs, if they've changed.
- Changed = ILS != InLocsT;
- if (Changed)
- ILS = InLocsT;
-
- return std::tuple<bool, bool>(Changed, DowngradeOccurred);
+ // No disagreement -> live-through value.
+ if (!Disagree) {
+ Changed = LiveIn != FirstVal;
+ if (Changed)
+ LiveIn = FirstVal;
+ return Changed;
+ } else {
+ // Otherwise use a VPHI.
+ DbgValue VPHI(MBB.getNumber(), FirstVal.Properties, DbgValue::VPHI);
+ Changed = LiveIn != VPHI;
+ if (Changed)
+ LiveIn = VPHI;
+ return Changed;
+ }
}
-void InstrRefBasedLDV::vlocDataflow(
- const LexicalScope *Scope, const DILocation *DILoc,
+void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc,
const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, LiveInsT &Output,
ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
SmallVectorImpl<VLocTracker> &AllTheVLocs) {
- // This method is much like mlocDataflow: but focuses on a single
+ // This method is much like buildMLocValueMap: but focuses on a single
// LexicalScope at a time. Pick out a set of blocks and variables that are
// to have their value assignments solved, then run our dataflow algorithm
// until a fixedpoint is reached.
@@ -3235,8 +2494,8 @@ void InstrRefBasedLDV::vlocDataflow(
continue;
if (!ArtificialBlocks.count(succ))
continue;
- DFS.push_back(std::make_pair(succ, succ->succ_begin()));
ToAdd.insert(succ);
+ DFS.push_back(std::make_pair(succ, succ->succ_begin()));
}
// Search all those blocks, depth first.
@@ -3252,8 +2511,8 @@ void InstrRefBasedLDV::vlocDataflow(
// If the current successor is artificial and unexplored, descend into
// it.
if (!ToAdd.count(*CurSucc) && ArtificialBlocks.count(*CurSucc)) {
- DFS.push_back(std::make_pair(*CurSucc, (*CurSucc)->succ_begin()));
ToAdd.insert(*CurSucc);
+ DFS.push_back(std::make_pair(*CurSucc, (*CurSucc)->succ_begin()));
continue;
}
@@ -3278,6 +2537,13 @@ void InstrRefBasedLDV::vlocDataflow(
if (BlocksToExplore.size() == 1)
return;
+ // Convert a const set to a non-const set. LexicalScopes
+ // getMachineBasicBlocks returns const MBB pointers, IDF wants mutable ones.
+ // (Neither of them mutate anything).
+ SmallPtrSet<MachineBasicBlock *, 8> MutBlocksToExplore;
+ for (const auto *MBB : BlocksToExplore)
+ MutBlocksToExplore.insert(const_cast<MachineBasicBlock *>(MBB));
+
// Picks out relevants blocks RPO order and sort them.
for (auto *MBB : BlocksToExplore)
BlockOrders.push_back(const_cast<MachineBasicBlock *>(MBB));
@@ -3286,9 +2552,18 @@ void InstrRefBasedLDV::vlocDataflow(
unsigned NumBlocks = BlockOrders.size();
// Allocate some vectors for storing the live ins and live outs. Large.
- SmallVector<DenseMap<DebugVariable, DbgValue>, 32> LiveIns, LiveOuts;
- LiveIns.resize(NumBlocks);
- LiveOuts.resize(NumBlocks);
+ SmallVector<DbgValue, 32> LiveIns, LiveOuts;
+ LiveIns.reserve(NumBlocks);
+ LiveOuts.reserve(NumBlocks);
+
+ // Initialize all values to start as NoVals. This signifies "it's live
+ // through, but we don't know what it is".
+ DbgValueProperties EmptyProperties(EmptyExpr, false);
+ for (unsigned int I = 0; I < NumBlocks; ++I) {
+ DbgValue EmptyDbgValue(I, EmptyProperties, DbgValue::NoVal);
+ LiveIns.push_back(EmptyDbgValue);
+ LiveOuts.push_back(EmptyDbgValue);
+ }
// Produce by-MBB indexes of live-in/live-outs, to ease lookup within
// vlocJoin.
@@ -3300,108 +2575,164 @@ void InstrRefBasedLDV::vlocDataflow(
LiveInIdx[BlockOrders[I]] = &LiveIns[I];
}
- for (auto *MBB : BlockOrders) {
- Worklist.push(BBToOrder[MBB]);
- OnWorklist.insert(MBB);
- }
+ // Loop over each variable and place PHIs for it, then propagate values
+ // between blocks. This keeps the locality of working on one lexical scope at
+ // at time, but avoids re-processing variable values because some other
+ // variable has been assigned.
+ for (auto &Var : VarsWeCareAbout) {
+ // Re-initialize live-ins and live-outs, to clear the remains of previous
+ // variables live-ins / live-outs.
+ for (unsigned int I = 0; I < NumBlocks; ++I) {
+ DbgValue EmptyDbgValue(I, EmptyProperties, DbgValue::NoVal);
+ LiveIns[I] = EmptyDbgValue;
+ LiveOuts[I] = EmptyDbgValue;
+ }
- // Iterate over all the blocks we selected, propagating variable values.
- bool FirstTrip = true;
- SmallPtrSet<const MachineBasicBlock *, 16> VLOCVisited;
- while (!Worklist.empty() || !Pending.empty()) {
- while (!Worklist.empty()) {
- auto *MBB = OrderToBB[Worklist.top()];
- CurBB = MBB->getNumber();
- Worklist.pop();
+ // Place PHIs for variable values, using the LLVM IDF calculator.
+ // Collect the set of blocks where variables are def'd.
+ SmallPtrSet<MachineBasicBlock *, 32> DefBlocks;
+ for (const MachineBasicBlock *ExpMBB : BlocksToExplore) {
+ auto &TransferFunc = AllTheVLocs[ExpMBB->getNumber()].Vars;
+ if (TransferFunc.find(Var) != TransferFunc.end())
+ DefBlocks.insert(const_cast<MachineBasicBlock *>(ExpMBB));
+ }
- DenseMap<DebugVariable, DbgValue> JoinedInLocs;
+ SmallVector<MachineBasicBlock *, 32> PHIBlocks;
- // Join values from predecessors. Updates LiveInIdx, and writes output
- // into JoinedInLocs.
- bool InLocsChanged, DowngradeOccurred;
- std::tie(InLocsChanged, DowngradeOccurred) = vlocJoin(
- *MBB, LiveOutIdx, LiveInIdx, (FirstTrip) ? &VLOCVisited : nullptr,
- CurBB, VarsWeCareAbout, MOutLocs, MInLocs, InScopeBlocks,
- BlocksToExplore, JoinedInLocs);
+ // Request the set of PHIs we should insert for this variable.
+ BlockPHIPlacement(MutBlocksToExplore, DefBlocks, PHIBlocks);
- bool FirstVisit = VLOCVisited.insert(MBB).second;
+ // Insert PHIs into the per-block live-in tables for this variable.
+ for (MachineBasicBlock *PHIMBB : PHIBlocks) {
+ unsigned BlockNo = PHIMBB->getNumber();
+ DbgValue *LiveIn = LiveInIdx[PHIMBB];
+ *LiveIn = DbgValue(BlockNo, EmptyProperties, DbgValue::VPHI);
+ }
- // Always explore transfer function if inlocs changed, or if we've not
- // visited this block before.
- InLocsChanged |= FirstVisit;
+ for (auto *MBB : BlockOrders) {
+ Worklist.push(BBToOrder[MBB]);
+ OnWorklist.insert(MBB);
+ }
- // If a downgrade occurred, book us in for re-examination on the next
- // iteration.
- if (DowngradeOccurred && OnPending.insert(MBB).second)
- Pending.push(BBToOrder[MBB]);
+ // Iterate over all the blocks we selected, propagating the variables value.
+ // This loop does two things:
+ // * Eliminates un-necessary VPHIs in vlocJoin,
+ // * Evaluates the blocks transfer function (i.e. variable assignments) and
+ // stores the result to the blocks live-outs.
+ // Always evaluate the transfer function on the first iteration, and when
+ // the live-ins change thereafter.
+ bool FirstTrip = true;
+ while (!Worklist.empty() || !Pending.empty()) {
+ while (!Worklist.empty()) {
+ auto *MBB = OrderToBB[Worklist.top()];
+ CurBB = MBB->getNumber();
+ Worklist.pop();
+
+ auto LiveInsIt = LiveInIdx.find(MBB);
+ assert(LiveInsIt != LiveInIdx.end());
+ DbgValue *LiveIn = LiveInsIt->second;
+
+ // Join values from predecessors. Updates LiveInIdx, and writes output
+ // into JoinedInLocs.
+ bool InLocsChanged =
+ vlocJoin(*MBB, LiveOutIdx, InScopeBlocks, BlocksToExplore, *LiveIn);
+
+ SmallVector<const MachineBasicBlock *, 8> Preds;
+ for (const auto *Pred : MBB->predecessors())
+ Preds.push_back(Pred);
+
+ // If this block's live-in value is a VPHI, try to pick a machine-value
+ // for it. This makes the machine-value available and propagated
+ // through all blocks by the time value propagation finishes. We can't
+ // do this any earlier as it needs to read the block live-outs.
+ if (LiveIn->Kind == DbgValue::VPHI && LiveIn->BlockNo == (int)CurBB) {
+ // There's a small possibility that on a preceeding path, a VPHI is
+ // eliminated and transitions from VPHI-with-location to
+ // live-through-value. As a result, the selected location of any VPHI
+ // might change, so we need to re-compute it on each iteration.
+ Optional<ValueIDNum> ValueNum =
+ pickVPHILoc(*MBB, Var, LiveOutIdx, MOutLocs, Preds);
+
+ if (ValueNum) {
+ InLocsChanged |= LiveIn->ID != *ValueNum;
+ LiveIn->ID = *ValueNum;
+ }
+ }
- if (!InLocsChanged)
- continue;
+ if (!InLocsChanged && !FirstTrip)
+ continue;
+
+ DbgValue *LiveOut = LiveOutIdx[MBB];
+ bool OLChanged = false;
- // Do transfer function.
- auto &VTracker = AllTheVLocs[MBB->getNumber()];
- for (auto &Transfer : VTracker.Vars) {
- // Is this var we're mangling in this scope?
- if (VarsWeCareAbout.count(Transfer.first)) {
+ // Do transfer function.
+ auto &VTracker = AllTheVLocs[MBB->getNumber()];
+ auto TransferIt = VTracker.Vars.find(Var);
+ if (TransferIt != VTracker.Vars.end()) {
// Erase on empty transfer (DBG_VALUE $noreg).
- if (Transfer.second.Kind == DbgValue::Undef) {
- JoinedInLocs.erase(Transfer.first);
+ if (TransferIt->second.Kind == DbgValue::Undef) {
+ DbgValue NewVal(MBB->getNumber(), EmptyProperties, DbgValue::NoVal);
+ if (*LiveOut != NewVal) {
+ *LiveOut = NewVal;
+ OLChanged = true;
+ }
} else {
// Insert new variable value; or overwrite.
- auto NewValuePair = std::make_pair(Transfer.first, Transfer.second);
- auto Result = JoinedInLocs.insert(NewValuePair);
- if (!Result.second)
- Result.first->second = Transfer.second;
+ if (*LiveOut != TransferIt->second) {
+ *LiveOut = TransferIt->second;
+ OLChanged = true;
+ }
+ }
+ } else {
+ // Just copy live-ins to live-outs, for anything not transferred.
+ if (*LiveOut != *LiveIn) {
+ *LiveOut = *LiveIn;
+ OLChanged = true;
}
}
- }
-
- // Did the live-out locations change?
- bool OLChanged = JoinedInLocs != *LiveOutIdx[MBB];
-
- // If they haven't changed, there's no need to explore further.
- if (!OLChanged)
- continue;
- // Commit to the live-out record.
- *LiveOutIdx[MBB] = JoinedInLocs;
-
- // We should visit all successors. Ensure we'll visit any non-backedge
- // successors during this dataflow iteration; book backedge successors
- // to be visited next time around.
- for (auto s : MBB->successors()) {
- // Ignore out of scope / not-to-be-explored successors.
- if (LiveInIdx.find(s) == LiveInIdx.end())
+ // If no live-out value changed, there's no need to explore further.
+ if (!OLChanged)
continue;
- if (BBToOrder[s] > BBToOrder[MBB]) {
- if (OnWorklist.insert(s).second)
- Worklist.push(BBToOrder[s]);
- } else if (OnPending.insert(s).second && (FirstTrip || OLChanged)) {
- Pending.push(BBToOrder[s]);
+ // We should visit all successors. Ensure we'll visit any non-backedge
+ // successors during this dataflow iteration; book backedge successors
+ // to be visited next time around.
+ for (auto s : MBB->successors()) {
+ // Ignore out of scope / not-to-be-explored successors.
+ if (LiveInIdx.find(s) == LiveInIdx.end())
+ continue;
+
+ if (BBToOrder[s] > BBToOrder[MBB]) {
+ if (OnWorklist.insert(s).second)
+ Worklist.push(BBToOrder[s]);
+ } else if (OnPending.insert(s).second && (FirstTrip || OLChanged)) {
+ Pending.push(BBToOrder[s]);
+ }
}
}
+ Worklist.swap(Pending);
+ std::swap(OnWorklist, OnPending);
+ OnPending.clear();
+ assert(Pending.empty());
+ FirstTrip = false;
}
- Worklist.swap(Pending);
- std::swap(OnWorklist, OnPending);
- OnPending.clear();
- assert(Pending.empty());
- FirstTrip = false;
- }
-
- // Dataflow done. Now what? Save live-ins. Ignore any that are still marked
- // as being variable-PHIs, because those did not have their machine-PHI
- // value confirmed. Such variable values are places that could have been
- // PHIs, but are not.
- for (auto *MBB : BlockOrders) {
- auto &VarMap = *LiveInIdx[MBB];
- for (auto &P : VarMap) {
- if (P.second.Kind == DbgValue::Proposed ||
- P.second.Kind == DbgValue::NoVal)
+
+ // Save live-ins to output vector. Ignore any that are still marked as being
+ // VPHIs with no location -- those are variables that we know the value of,
+ // but are not actually available in the register file.
+ for (auto *MBB : BlockOrders) {
+ DbgValue *BlockLiveIn = LiveInIdx[MBB];
+ if (BlockLiveIn->Kind == DbgValue::NoVal)
continue;
- Output[MBB->getNumber()].push_back(P);
+ if (BlockLiveIn->Kind == DbgValue::VPHI &&
+ BlockLiveIn->ID == ValueIDNum::EmptyValue)
+ continue;
+ if (BlockLiveIn->Kind == DbgValue::VPHI)
+ BlockLiveIn->Kind = DbgValue::Def;
+ Output[MBB->getNumber()].push_back(std::make_pair(Var, *BlockLiveIn));
}
- }
+ } // Per-variable loop.
BlockOrders.clear();
BlocksToExplore.clear();
@@ -3485,6 +2816,10 @@ void InstrRefBasedLDV::emitLocations(
void InstrRefBasedLDV::initialSetup(MachineFunction &MF) {
// Build some useful data structures.
+
+ LLVMContext &Context = MF.getFunction().getContext();
+ EmptyExpr = DIExpression::get(Context, {});
+
auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool {
if (const DebugLoc &DL = MI.getDebugLoc())
return DL.getLine() != 0;
@@ -3524,7 +2859,10 @@ void InstrRefBasedLDV::initialSetup(MachineFunction &MF) {
/// Calculate the liveness information for the given machine function and
/// extend ranges across basic blocks.
bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
- TargetPassConfig *TPC) {
+ MachineDominatorTree *DomTree,
+ TargetPassConfig *TPC,
+ unsigned InputBBLimit,
+ unsigned InputDbgValLimit) {
// No subprogram means this function contains no debuginfo.
if (!MF.getFunction().getSubprogram())
return false;
@@ -3532,7 +2870,9 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n");
this->TPC = TPC;
+ this->DomTree = DomTree;
TRI = MF.getSubtarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
TII = MF.getSubtarget().getInstrInfo();
TFI = MF.getSubtarget().getFrameLowering();
TFI->getCalleeSaves(MF, CalleeSavedRegs);
@@ -3569,6 +2909,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
ValueIDNum **MInLocs = new ValueIDNum *[MaxNumBlocks];
unsigned NumLocs = MTracker->getNumLocs();
for (int i = 0; i < MaxNumBlocks; ++i) {
+ // These all auto-initialize to ValueIDNum::EmptyValue
MOutLocs[i] = new ValueIDNum[NumLocs];
MInLocs[i] = new ValueIDNum[NumLocs];
}
@@ -3577,7 +2918,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
// storing the computed live-ins / live-outs into the array-of-arrays. We use
// both live-ins and live-outs for decision making in the variable value
// dataflow problem.
- mlocDataflow(MInLocs, MOutLocs, MLocTransfer);
+ buildMLocValueMap(MF, MInLocs, MOutLocs, MLocTransfer);
// Patch up debug phi numbers, turning unknown block-live-in values into
// either live-through machine values, or PHIs.
@@ -3626,6 +2967,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
// To mirror old LiveDebugValues, enumerate variables in RPOT order. Otherwise
// the order is unimportant, it just has to be stable.
+ unsigned VarAssignCount = 0;
for (unsigned int I = 0; I < OrderToBB.size(); ++I) {
auto *MBB = OrderToBB[I];
auto *VTracker = &vlocs[MBB->getNumber()];
@@ -3643,24 +2985,42 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
ScopeToVars[Scope].insert(Var);
ScopeToBlocks[Scope].insert(VTracker->MBB);
ScopeToDILocation[Scope] = ScopeLoc;
+ ++VarAssignCount;
}
}
- // OK. Iterate over scopes: there might be something to be said for
- // ordering them by size/locality, but that's for the future. For each scope,
- // solve the variable value problem, producing a map of variables to values
- // in SavedLiveIns.
- for (auto &P : ScopeToVars) {
- vlocDataflow(P.first, ScopeToDILocation[P.first], P.second,
- ScopeToBlocks[P.first], SavedLiveIns, MOutLocs, MInLocs,
- vlocs);
- }
+ bool Changed = false;
+
+ // If we have an extremely large number of variable assignments and blocks,
+ // bail out at this point. We've burnt some time doing analysis already,
+ // however we should cut our losses.
+ if ((unsigned)MaxNumBlocks > InputBBLimit &&
+ VarAssignCount > InputDbgValLimit) {
+ LLVM_DEBUG(dbgs() << "Disabling InstrRefBasedLDV: " << MF.getName()
+ << " has " << MaxNumBlocks << " basic blocks and "
+ << VarAssignCount
+ << " variable assignments, exceeding limits.\n");
+ } else {
+ // Compute the extended ranges, iterating over scopes. There might be
+ // something to be said for ordering them by size/locality, but that's for
+ // the future. For each scope, solve the variable value problem, producing
+ // a map of variables to values in SavedLiveIns.
+ for (auto &P : ScopeToVars) {
+ buildVLocValueMap(ScopeToDILocation[P.first], P.second,
+ ScopeToBlocks[P.first], SavedLiveIns, MOutLocs, MInLocs,
+ vlocs);
+ }
+
+ // Using the computed value locations and variable values for each block,
+ // create the DBG_VALUE instructions representing the extended variable
+ // locations.
+ emitLocations(MF, SavedLiveIns, MOutLocs, MInLocs, AllVarsNumbering, *TPC);
- // Using the computed value locations and variable values for each block,
- // create the DBG_VALUE instructions representing the extended variable
- // locations.
- emitLocations(MF, SavedLiveIns, MOutLocs, MInLocs, AllVarsNumbering, *TPC);
+ // Did we actually make any changes? If we created any DBG_VALUEs, then yes.
+ Changed = TTracker->Transfers.size() != 0;
+ }
+ // Common clean-up of memory.
for (int Idx = 0; Idx < MaxNumBlocks; ++Idx) {
delete[] MOutLocs[Idx];
delete[] MInLocs[Idx];
@@ -3668,9 +3028,6 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
delete[] MOutLocs;
delete[] MInLocs;
- // Did we actually make any changes? If we created any DBG_VALUEs, then yes.
- bool Changed = TTracker->Transfers.size() != 0;
-
delete MTracker;
delete TTracker;
MTracker = nullptr;
@@ -3883,10 +3240,8 @@ public:
/// vector.
static void FindPredecessorBlocks(LDVSSABlock *BB,
SmallVectorImpl<LDVSSABlock *> *Preds) {
- for (MachineBasicBlock::pred_iterator PI = BB->BB.pred_begin(),
- E = BB->BB.pred_end();
- PI != E; ++PI)
- Preds->push_back(BB->Updater.getSSALDVBlock(*PI));
+ for (MachineBasicBlock *Pred : BB->BB.predecessors())
+ Preds->push_back(BB->Updater.getSSALDVBlock(Pred));
}
/// GetUndefVal - Normally creates an IMPLICIT_DEF instruction with a new
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
new file mode 100644
index 000000000000..d96ef6d4f6e5
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
@@ -0,0 +1,1051 @@
+//===- InstrRefBasedImpl.h - Tracking Debug Value MIs ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H
+#define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+
+#include "LiveDebugValues.h"
+
+class TransferTracker;
+
+// Forward dec of unit test class, so that we can peer into the LDV object.
+class InstrRefLDVTest;
+
+namespace LiveDebugValues {
+
+class MLocTracker;
+
+using namespace llvm;
+
+/// Handle-class for a particular "location". This value-type uniquely
+/// symbolises a register or stack location, allowing manipulation of locations
+/// without concern for where that location is. Practically, this allows us to
+/// treat the state of the machine at a particular point as an array of values,
+/// rather than a map of values.
+class LocIdx {
+ unsigned Location;
+
+ // Default constructor is private, initializing to an illegal location number.
+ // Use only for "not an entry" elements in IndexedMaps.
+ LocIdx() : Location(UINT_MAX) {}
+
+public:
+#define NUM_LOC_BITS 24
+ LocIdx(unsigned L) : Location(L) {
+ assert(L < (1 << NUM_LOC_BITS) && "Machine locations must fit in 24 bits");
+ }
+
+ static LocIdx MakeIllegalLoc() { return LocIdx(); }
+ static LocIdx MakeTombstoneLoc() {
+ LocIdx L = LocIdx();
+ --L.Location;
+ return L;
+ }
+
+ bool isIllegal() const { return Location == UINT_MAX; }
+
+ uint64_t asU64() const { return Location; }
+
+ bool operator==(unsigned L) const { return Location == L; }
+
+ bool operator==(const LocIdx &L) const { return Location == L.Location; }
+
+ bool operator!=(unsigned L) const { return !(*this == L); }
+
+ bool operator!=(const LocIdx &L) const { return !(*this == L); }
+
+ bool operator<(const LocIdx &Other) const {
+ return Location < Other.Location;
+ }
+};
+
+// The location at which a spilled value resides. It consists of a register and
+// an offset.
+struct SpillLoc {
+ unsigned SpillBase;
+ StackOffset SpillOffset;
+ bool operator==(const SpillLoc &Other) const {
+ return std::make_pair(SpillBase, SpillOffset) ==
+ std::make_pair(Other.SpillBase, Other.SpillOffset);
+ }
+ bool operator<(const SpillLoc &Other) const {
+ return std::make_tuple(SpillBase, SpillOffset.getFixed(),
+ SpillOffset.getScalable()) <
+ std::make_tuple(Other.SpillBase, Other.SpillOffset.getFixed(),
+ Other.SpillOffset.getScalable());
+ }
+};
+
+/// Unique identifier for a value defined by an instruction, as a value type.
+/// Casts back and forth to a uint64_t. Probably replacable with something less
+/// bit-constrained. Each value identifies the instruction and machine location
+/// where the value is defined, although there may be no corresponding machine
+/// operand for it (ex: regmasks clobbering values). The instructions are
+/// one-based, and definitions that are PHIs have instruction number zero.
+///
+/// The obvious limits of a 1M block function or 1M instruction blocks are
+/// problematic; but by that point we should probably have bailed out of
+/// trying to analyse the function.
+class ValueIDNum {
+ union {
+ struct {
+ uint64_t BlockNo : 20; /// The block where the def happens.
+ uint64_t InstNo : 20; /// The Instruction where the def happens.
+ /// One based, is distance from start of block.
+ uint64_t LocNo
+ : NUM_LOC_BITS; /// The machine location where the def happens.
+ } s;
+ uint64_t Value;
+ } u;
+
+ static_assert(sizeof(u) == 8, "Badly packed ValueIDNum?");
+
+public:
+ // Default-initialize to EmptyValue. This is necessary to make IndexedMaps
+ // of values to work.
+ ValueIDNum() { u.Value = EmptyValue.asU64(); }
+
+ ValueIDNum(uint64_t Block, uint64_t Inst, uint64_t Loc) {
+ u.s = {Block, Inst, Loc};
+ }
+
+ ValueIDNum(uint64_t Block, uint64_t Inst, LocIdx Loc) {
+ u.s = {Block, Inst, Loc.asU64()};
+ }
+
+ uint64_t getBlock() const { return u.s.BlockNo; }
+ uint64_t getInst() const { return u.s.InstNo; }
+ uint64_t getLoc() const { return u.s.LocNo; }
+ bool isPHI() const { return u.s.InstNo == 0; }
+
+ uint64_t asU64() const { return u.Value; }
+
+ static ValueIDNum fromU64(uint64_t v) {
+ ValueIDNum Val;
+ Val.u.Value = v;
+ return Val;
+ }
+
+ bool operator<(const ValueIDNum &Other) const {
+ return asU64() < Other.asU64();
+ }
+
+ bool operator==(const ValueIDNum &Other) const {
+ return u.Value == Other.u.Value;
+ }
+
+ bool operator!=(const ValueIDNum &Other) const { return !(*this == Other); }
+
+ std::string asString(const std::string &mlocname) const {
+ return Twine("Value{bb: ")
+ .concat(Twine(u.s.BlockNo)
+ .concat(Twine(", inst: ")
+ .concat((u.s.InstNo ? Twine(u.s.InstNo)
+ : Twine("live-in"))
+ .concat(Twine(", loc: ").concat(
+ Twine(mlocname)))
+ .concat(Twine("}")))))
+ .str();
+ }
+
+ static ValueIDNum EmptyValue;
+ static ValueIDNum TombstoneValue;
+};
+
+/// Thin wrapper around an integer -- designed to give more type safety to
+/// spill location numbers.
+class SpillLocationNo {
+public:
+ explicit SpillLocationNo(unsigned SpillNo) : SpillNo(SpillNo) {}
+ unsigned SpillNo;
+ unsigned id() const { return SpillNo; }
+
+ bool operator<(const SpillLocationNo &Other) const {
+ return SpillNo < Other.SpillNo;
+ }
+
+ bool operator==(const SpillLocationNo &Other) const {
+ return SpillNo == Other.SpillNo;
+ }
+ bool operator!=(const SpillLocationNo &Other) const {
+ return !(*this == Other);
+ }
+};
+
+/// Meta qualifiers for a value. Pair of whatever expression is used to qualify
+/// the the value, and Boolean of whether or not it's indirect.
+class DbgValueProperties {
+public:
+ DbgValueProperties(const DIExpression *DIExpr, bool Indirect)
+ : DIExpr(DIExpr), Indirect(Indirect) {}
+
+ /// Extract properties from an existing DBG_VALUE instruction.
+ DbgValueProperties(const MachineInstr &MI) {
+ assert(MI.isDebugValue());
+ DIExpr = MI.getDebugExpression();
+ Indirect = MI.getOperand(1).isImm();
+ }
+
+ bool operator==(const DbgValueProperties &Other) const {
+ return std::tie(DIExpr, Indirect) == std::tie(Other.DIExpr, Other.Indirect);
+ }
+
+ bool operator!=(const DbgValueProperties &Other) const {
+ return !(*this == Other);
+ }
+
+ const DIExpression *DIExpr;
+ bool Indirect;
+};
+
+/// Class recording the (high level) _value_ of a variable. Identifies either
+/// the value of the variable as a ValueIDNum, or a constant MachineOperand.
+/// This class also stores meta-information about how the value is qualified.
+/// Used to reason about variable values when performing the second
+/// (DebugVariable specific) dataflow analysis.
+class DbgValue {
+public:
+ /// If Kind is Def, the value number that this value is based on. VPHIs set
+ /// this field to EmptyValue if there is no machine-value for this VPHI, or
+ /// the corresponding machine-value if there is one.
+ ValueIDNum ID;
+ /// If Kind is Const, the MachineOperand defining this value.
+ Optional<MachineOperand> MO;
+ /// For a NoVal or VPHI DbgValue, which block it was generated in.
+ int BlockNo;
+
+ /// Qualifiers for the ValueIDNum above.
+ DbgValueProperties Properties;
+
+ typedef enum {
+ Undef, // Represents a DBG_VALUE $noreg in the transfer function only.
+ Def, // This value is defined by an inst, or is a PHI value.
+ Const, // A constant value contained in the MachineOperand field.
+ VPHI, // Incoming values to BlockNo differ, those values must be joined by
+ // a PHI in this block.
+ NoVal, // Empty DbgValue indicating an unknown value. Used as initializer,
+ // before dominating blocks values are propagated in.
+ } KindT;
+ /// Discriminator for whether this is a constant or an in-program value.
+ KindT Kind;
+
+ DbgValue(const ValueIDNum &Val, const DbgValueProperties &Prop, KindT Kind)
+ : ID(Val), MO(None), BlockNo(0), Properties(Prop), Kind(Kind) {
+ assert(Kind == Def);
+ }
+
+ DbgValue(unsigned BlockNo, const DbgValueProperties &Prop, KindT Kind)
+ : ID(ValueIDNum::EmptyValue), MO(None), BlockNo(BlockNo),
+ Properties(Prop), Kind(Kind) {
+ assert(Kind == NoVal || Kind == VPHI);
+ }
+
+ DbgValue(const MachineOperand &MO, const DbgValueProperties &Prop, KindT Kind)
+ : ID(ValueIDNum::EmptyValue), MO(MO), BlockNo(0), Properties(Prop),
+ Kind(Kind) {
+ assert(Kind == Const);
+ }
+
+ DbgValue(const DbgValueProperties &Prop, KindT Kind)
+ : ID(ValueIDNum::EmptyValue), MO(None), BlockNo(0), Properties(Prop),
+ Kind(Kind) {
+ assert(Kind == Undef &&
+ "Empty DbgValue constructor must pass in Undef kind");
+ }
+
+#ifndef NDEBUG
+ void dump(const MLocTracker *MTrack) const;
+#endif
+
+ bool operator==(const DbgValue &Other) const {
+ if (std::tie(Kind, Properties) != std::tie(Other.Kind, Other.Properties))
+ return false;
+ else if (Kind == Def && ID != Other.ID)
+ return false;
+ else if (Kind == NoVal && BlockNo != Other.BlockNo)
+ return false;
+ else if (Kind == Const)
+ return MO->isIdenticalTo(*Other.MO);
+ else if (Kind == VPHI && BlockNo != Other.BlockNo)
+ return false;
+ else if (Kind == VPHI && ID != Other.ID)
+ return false;
+
+ return true;
+ }
+
+ bool operator!=(const DbgValue &Other) const { return !(*this == Other); }
+};
+
+class LocIdxToIndexFunctor {
+public:
+ using argument_type = LocIdx;
+ unsigned operator()(const LocIdx &L) const { return L.asU64(); }
+};
+
+/// Tracker for what values are in machine locations. Listens to the Things
+/// being Done by various instructions, and maintains a table of what machine
+/// locations have what values (as defined by a ValueIDNum).
+///
+/// There are potentially a much larger number of machine locations on the
+/// target machine than the actual working-set size of the function. On x86 for
+/// example, we're extremely unlikely to want to track values through control
+/// or debug registers. To avoid doing so, MLocTracker has several layers of
+/// indirection going on, described below, to avoid unnecessarily tracking
+/// any location.
+///
+/// Here's a sort of diagram of the indexes, read from the bottom up:
+///
+/// Size on stack Offset on stack
+/// \ /
+/// Stack Idx (Where in slot is this?)
+/// /
+/// /
+/// Slot Num (%stack.0) /
+/// FrameIdx => SpillNum /
+/// \ /
+/// SpillID (int) Register number (int)
+/// \ /
+/// LocationID => LocIdx
+/// |
+/// LocIdx => ValueIDNum
+///
+/// The aim here is that the LocIdx => ValueIDNum vector is just an array of
+/// values in numbered locations, so that later analyses can ignore whether the
+/// location is a register or otherwise. To map a register / spill location to
+/// a LocIdx, you have to use the (sparse) LocationID => LocIdx map. And to
+/// build a LocationID for a stack slot, you need to combine identifiers for
+/// which stack slot it is and where within that slot is being described.
+///
+/// Register mask operands cause trouble by technically defining every register;
+/// various hacks are used to avoid tracking registers that are never read and
+/// only written by regmasks.
+class MLocTracker {
+public:
+ MachineFunction &MF;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ const TargetLowering &TLI;
+
+ /// IndexedMap type, mapping from LocIdx to ValueIDNum.
+ using LocToValueType = IndexedMap<ValueIDNum, LocIdxToIndexFunctor>;
+
+ /// Map of LocIdxes to the ValueIDNums that they store. This is tightly
+ /// packed, entries only exist for locations that are being tracked.
+ LocToValueType LocIdxToIDNum;
+
+ /// "Map" of machine location IDs (i.e., raw register or spill number) to the
+ /// LocIdx key / number for that location. There are always at least as many
+ /// as the number of registers on the target -- if the value in the register
+ /// is not being tracked, then the LocIdx value will be zero. New entries are
+ /// appended if a new spill slot begins being tracked.
+ /// This, and the corresponding reverse map persist for the analysis of the
+ /// whole function, and is necessarying for decoding various vectors of
+ /// values.
+ std::vector<LocIdx> LocIDToLocIdx;
+
+ /// Inverse map of LocIDToLocIdx.
+ IndexedMap<unsigned, LocIdxToIndexFunctor> LocIdxToLocID;
+
+ /// When clobbering register masks, we chose to not believe the machine model
+ /// and don't clobber SP. Do the same for SP aliases, and for efficiency,
+ /// keep a set of them here.
+ SmallSet<Register, 8> SPAliases;
+
+ /// Unique-ification of spill. Used to number them -- their LocID number is
+ /// the index in SpillLocs minus one plus NumRegs.
+ UniqueVector<SpillLoc> SpillLocs;
+
+ // If we discover a new machine location, assign it an mphi with this
+ // block number.
+ unsigned CurBB;
+
+ /// Cached local copy of the number of registers the target has.
+ unsigned NumRegs;
+
+ /// Number of slot indexes the target has -- distinct segments of a stack
+ /// slot that can take on the value of a subregister, when a super-register
+ /// is written to the stack.
+ unsigned NumSlotIdxes;
+
+ /// Collection of register mask operands that have been observed. Second part
+ /// of pair indicates the instruction that they happened in. Used to
+ /// reconstruct where defs happened if we start tracking a location later
+ /// on.
+ SmallVector<std::pair<const MachineOperand *, unsigned>, 32> Masks;
+
+ /// Pair for describing a position within a stack slot -- first the size in
+ /// bits, then the offset.
+ typedef std::pair<unsigned short, unsigned short> StackSlotPos;
+
+ /// Map from a size/offset pair describing a position in a stack slot, to a
+ /// numeric identifier for that position. Allows easier identification of
+ /// individual positions.
+ DenseMap<StackSlotPos, unsigned> StackSlotIdxes;
+
+ /// Inverse of StackSlotIdxes.
+ DenseMap<unsigned, StackSlotPos> StackIdxesToPos;
+
+ /// Iterator for locations and the values they contain. Dereferencing
+ /// produces a struct/pair containing the LocIdx key for this location,
+ /// and a reference to the value currently stored. Simplifies the process
+ /// of seeking a particular location.
+ class MLocIterator {
+ LocToValueType &ValueMap;
+ LocIdx Idx;
+
+ public:
+ class value_type {
+ public:
+ value_type(LocIdx Idx, ValueIDNum &Value) : Idx(Idx), Value(Value) {}
+ const LocIdx Idx; /// Read-only index of this location.
+ ValueIDNum &Value; /// Reference to the stored value at this location.
+ };
+
+ MLocIterator(LocToValueType &ValueMap, LocIdx Idx)
+ : ValueMap(ValueMap), Idx(Idx) {}
+
+ bool operator==(const MLocIterator &Other) const {
+ assert(&ValueMap == &Other.ValueMap);
+ return Idx == Other.Idx;
+ }
+
+ bool operator!=(const MLocIterator &Other) const {
+ return !(*this == Other);
+ }
+
+ void operator++() { Idx = LocIdx(Idx.asU64() + 1); }
+
+ value_type operator*() { return value_type(Idx, ValueMap[LocIdx(Idx)]); }
+ };
+
+ MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI, const TargetLowering &TLI);
+
+ /// Produce location ID number for a Register. Provides some small amount of
+ /// type safety.
+ /// \param Reg The register we're looking up.
+ unsigned getLocID(Register Reg) { return Reg.id(); }
+
+ /// Produce location ID number for a spill position.
+ /// \param Spill The number of the spill we're fetching the location for.
+ /// \param SpillSubReg Subregister within the spill we're addressing.
+ unsigned getLocID(SpillLocationNo Spill, unsigned SpillSubReg) {
+ unsigned short Size = TRI.getSubRegIdxSize(SpillSubReg);
+ unsigned short Offs = TRI.getSubRegIdxOffset(SpillSubReg);
+ return getLocID(Spill, {Size, Offs});
+ }
+
+ /// Produce location ID number for a spill position.
+ /// \param Spill The number of the spill we're fetching the location for.
+ /// \apram SpillIdx size/offset within the spill slot to be addressed.
+ unsigned getLocID(SpillLocationNo Spill, StackSlotPos Idx) {
+ unsigned SlotNo = Spill.id() - 1;
+ SlotNo *= NumSlotIdxes;
+ assert(StackSlotIdxes.find(Idx) != StackSlotIdxes.end());
+ SlotNo += StackSlotIdxes[Idx];
+ SlotNo += NumRegs;
+ return SlotNo;
+ }
+
+ /// Given a spill number, and a slot within the spill, calculate the ID number
+ /// for that location.
+ unsigned getSpillIDWithIdx(SpillLocationNo Spill, unsigned Idx) {
+ unsigned SlotNo = Spill.id() - 1;
+ SlotNo *= NumSlotIdxes;
+ SlotNo += Idx;
+ SlotNo += NumRegs;
+ return SlotNo;
+ }
+
+ /// Return the spill number that a location ID corresponds to.
+ SpillLocationNo locIDToSpill(unsigned ID) const {
+ assert(ID >= NumRegs);
+ ID -= NumRegs;
+ // Truncate away the index part, leaving only the spill number.
+ ID /= NumSlotIdxes;
+ return SpillLocationNo(ID + 1); // The UniqueVector is one-based.
+ }
+
+ /// Returns the spill-slot size/offs that a location ID corresponds to.
+ StackSlotPos locIDToSpillIdx(unsigned ID) const {
+ assert(ID >= NumRegs);
+ ID -= NumRegs;
+ unsigned Idx = ID % NumSlotIdxes;
+ return StackIdxesToPos.find(Idx)->second;
+ }
+
+ unsigned getNumLocs(void) const { return LocIdxToIDNum.size(); }
+
+ /// Reset all locations to contain a PHI value at the designated block. Used
+ /// sometimes for actual PHI values, othertimes to indicate the block entry
+ /// value (before any more information is known).
+ void setMPhis(unsigned NewCurBB) {
+ CurBB = NewCurBB;
+ for (auto Location : locations())
+ Location.Value = {CurBB, 0, Location.Idx};
+ }
+
+ /// Load values for each location from array of ValueIDNums. Take current
+ /// bbnum just in case we read a value from a hitherto untouched register.
+ void loadFromArray(ValueIDNum *Locs, unsigned NewCurBB) {
+ CurBB = NewCurBB;
+ // Iterate over all tracked locations, and load each locations live-in
+ // value into our local index.
+ for (auto Location : locations())
+ Location.Value = Locs[Location.Idx.asU64()];
+ }
+
+ /// Wipe any un-necessary location records after traversing a block.
+ void reset(void) {
+ // We could reset all the location values too; however either loadFromArray
+ // or setMPhis should be called before this object is re-used. Just
+ // clear Masks, they're definitely not needed.
+ Masks.clear();
+ }
+
+ /// Clear all data. Destroys the LocID <=> LocIdx map, which makes most of
+ /// the information in this pass uninterpretable.
+ void clear(void) {
+ reset();
+ LocIDToLocIdx.clear();
+ LocIdxToLocID.clear();
+ LocIdxToIDNum.clear();
+ // SpillLocs.reset(); XXX UniqueVector::reset assumes a SpillLoc casts from
+ // 0
+ SpillLocs = decltype(SpillLocs)();
+ StackSlotIdxes.clear();
+ StackIdxesToPos.clear();
+
+ LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc());
+ }
+
+ /// Set a locaiton to a certain value.
+ void setMLoc(LocIdx L, ValueIDNum Num) {
+ assert(L.asU64() < LocIdxToIDNum.size());
+ LocIdxToIDNum[L] = Num;
+ }
+
+ /// Read the value of a particular location
+ ValueIDNum readMLoc(LocIdx L) {
+ assert(L.asU64() < LocIdxToIDNum.size());
+ return LocIdxToIDNum[L];
+ }
+
+ /// Create a LocIdx for an untracked register ID. Initialize it to either an
+ /// mphi value representing a live-in, or a recent register mask clobber.
+ LocIdx trackRegister(unsigned ID);
+
+ LocIdx lookupOrTrackRegister(unsigned ID) {
+ LocIdx &Index = LocIDToLocIdx[ID];
+ if (Index.isIllegal())
+ Index = trackRegister(ID);
+ return Index;
+ }
+
+ /// Is register R currently tracked by MLocTracker?
+ bool isRegisterTracked(Register R) {
+ LocIdx &Index = LocIDToLocIdx[R];
+ return !Index.isIllegal();
+ }
+
+ /// Record a definition of the specified register at the given block / inst.
+ /// This doesn't take a ValueIDNum, because the definition and its location
+ /// are synonymous.
+ void defReg(Register R, unsigned BB, unsigned Inst) {
+ unsigned ID = getLocID(R);
+ LocIdx Idx = lookupOrTrackRegister(ID);
+ ValueIDNum ValueID = {BB, Inst, Idx};
+ LocIdxToIDNum[Idx] = ValueID;
+ }
+
+ /// Set a register to a value number. To be used if the value number is
+ /// known in advance.
+ void setReg(Register R, ValueIDNum ValueID) {
+ unsigned ID = getLocID(R);
+ LocIdx Idx = lookupOrTrackRegister(ID);
+ LocIdxToIDNum[Idx] = ValueID;
+ }
+
+ ValueIDNum readReg(Register R) {
+ unsigned ID = getLocID(R);
+ LocIdx Idx = lookupOrTrackRegister(ID);
+ return LocIdxToIDNum[Idx];
+ }
+
+ /// Reset a register value to zero / empty. Needed to replicate the
+ /// VarLoc implementation where a copy to/from a register effectively
+ /// clears the contents of the source register. (Values can only have one
+ /// machine location in VarLocBasedImpl).
+ void wipeRegister(Register R) {
+ unsigned ID = getLocID(R);
+ LocIdx Idx = LocIDToLocIdx[ID];
+ LocIdxToIDNum[Idx] = ValueIDNum::EmptyValue;
+ }
+
+ /// Determine the LocIdx of an existing register.
+ LocIdx getRegMLoc(Register R) {
+ unsigned ID = getLocID(R);
+ assert(ID < LocIDToLocIdx.size());
+ assert(LocIDToLocIdx[ID] != UINT_MAX); // Sentinal for IndexedMap.
+ return LocIDToLocIdx[ID];
+ }
+
+ /// Record a RegMask operand being executed. Defs any register we currently
+ /// track, stores a pointer to the mask in case we have to account for it
+ /// later.
+ void writeRegMask(const MachineOperand *MO, unsigned CurBB, unsigned InstID);
+
+ /// Find LocIdx for SpillLoc \p L, creating a new one if it's not tracked.
+ SpillLocationNo getOrTrackSpillLoc(SpillLoc L);
+
+ // Get LocIdx of a spill ID.
+ LocIdx getSpillMLoc(unsigned SpillID) {
+ assert(LocIDToLocIdx[SpillID] != UINT_MAX); // Sentinal for IndexedMap.
+ return LocIDToLocIdx[SpillID];
+ }
+
+ /// Return true if Idx is a spill machine location.
+ bool isSpill(LocIdx Idx) const { return LocIdxToLocID[Idx] >= NumRegs; }
+
+ MLocIterator begin() { return MLocIterator(LocIdxToIDNum, 0); }
+
+ MLocIterator end() {
+ return MLocIterator(LocIdxToIDNum, LocIdxToIDNum.size());
+ }
+
+ /// Return a range over all locations currently tracked.
+ iterator_range<MLocIterator> locations() {
+ return llvm::make_range(begin(), end());
+ }
+
+ std::string LocIdxToName(LocIdx Idx) const;
+
+ std::string IDAsString(const ValueIDNum &Num) const;
+
+#ifndef NDEBUG
+ LLVM_DUMP_METHOD void dump();
+
+ LLVM_DUMP_METHOD void dump_mloc_map();
+#endif
+
+ /// Create a DBG_VALUE based on machine location \p MLoc. Qualify it with the
+ /// information in \pProperties, for variable Var. Don't insert it anywhere,
+ /// just return the builder for it.
+ MachineInstrBuilder emitLoc(Optional<LocIdx> MLoc, const DebugVariable &Var,
+ const DbgValueProperties &Properties);
+};
+
+/// Collection of DBG_VALUEs observed when traversing a block. Records each
+/// variable and the value the DBG_VALUE refers to. Requires the machine value
+/// location dataflow algorithm to have run already, so that values can be
+/// identified.
+class VLocTracker {
+public:
+ /// Map DebugVariable to the latest Value it's defined to have.
+ /// Needs to be a MapVector because we determine order-in-the-input-MIR from
+ /// the order in this container.
+ /// We only retain the last DbgValue in each block for each variable, to
+ /// determine the blocks live-out variable value. The Vars container forms the
+ /// transfer function for this block, as part of the dataflow analysis. The
+ /// movement of values between locations inside of a block is handled at a
+ /// much later stage, in the TransferTracker class.
+ MapVector<DebugVariable, DbgValue> Vars;
+ DenseMap<DebugVariable, const DILocation *> Scopes;
+ MachineBasicBlock *MBB = nullptr;
+
+public:
+ VLocTracker() {}
+
+ void defVar(const MachineInstr &MI, const DbgValueProperties &Properties,
+ Optional<ValueIDNum> ID) {
+ assert(MI.isDebugValue() || MI.isDebugRef());
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ DbgValue Rec = (ID) ? DbgValue(*ID, Properties, DbgValue::Def)
+ : DbgValue(Properties, DbgValue::Undef);
+
+ // Attempt insertion; overwrite if it's already mapped.
+ auto Result = Vars.insert(std::make_pair(Var, Rec));
+ if (!Result.second)
+ Result.first->second = Rec;
+ Scopes[Var] = MI.getDebugLoc().get();
+ }
+
+ void defVar(const MachineInstr &MI, const MachineOperand &MO) {
+ // Only DBG_VALUEs can define constant-valued variables.
+ assert(MI.isDebugValue());
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ DbgValueProperties Properties(MI);
+ DbgValue Rec = DbgValue(MO, Properties, DbgValue::Const);
+
+ // Attempt insertion; overwrite if it's already mapped.
+ auto Result = Vars.insert(std::make_pair(Var, Rec));
+ if (!Result.second)
+ Result.first->second = Rec;
+ Scopes[Var] = MI.getDebugLoc().get();
+ }
+};
+
+/// Types for recording sets of variable fragments that overlap. For a given
+/// local variable, we record all other fragments of that variable that could
+/// overlap it, to reduce search time.
+using FragmentOfVar =
+ std::pair<const DILocalVariable *, DIExpression::FragmentInfo>;
+using OverlapMap =
+ DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>;
+
+// XXX XXX docs
+class InstrRefBasedLDV : public LDVImpl {
+public:
+ friend class ::InstrRefLDVTest;
+
+ using FragmentInfo = DIExpression::FragmentInfo;
+ using OptFragmentInfo = Optional<DIExpression::FragmentInfo>;
+
+ // Helper while building OverlapMap, a map of all fragments seen for a given
+ // DILocalVariable.
+ using VarToFragments =
+ DenseMap<const DILocalVariable *, SmallSet<FragmentInfo, 4>>;
+
+ /// Machine location/value transfer function, a mapping of which locations
+ /// are assigned which new values.
+ using MLocTransferMap = SmallDenseMap<LocIdx, ValueIDNum>;
+
+ /// Live in/out structure for the variable values: a per-block map of
+ /// variables to their values.
+ using LiveIdxT = DenseMap<const MachineBasicBlock *, DbgValue *>;
+
+ using VarAndLoc = std::pair<DebugVariable, DbgValue>;
+
+ /// Type for a live-in value: the predecessor block, and its value.
+ using InValueT = std::pair<MachineBasicBlock *, DbgValue *>;
+
+ /// Vector (per block) of a collection (inner smallvector) of live-ins.
+ /// Used as the result type for the variable value dataflow problem.
+ using LiveInsT = SmallVector<SmallVector<VarAndLoc, 8>, 8>;
+
+private:
+ MachineDominatorTree *DomTree;
+ const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ const TargetFrameLowering *TFI;
+ const MachineFrameInfo *MFI;
+ BitVector CalleeSavedRegs;
+ LexicalScopes LS;
+ TargetPassConfig *TPC;
+
+ // An empty DIExpression. Used default / placeholder DbgValueProperties
+ // objects, as we can't have null expressions.
+ const DIExpression *EmptyExpr;
+
+ /// Object to track machine locations as we step through a block. Could
+ /// probably be a field rather than a pointer, as it's always used.
+ MLocTracker *MTracker = nullptr;
+
+ /// Number of the current block LiveDebugValues is stepping through.
+ unsigned CurBB;
+
+ /// Number of the current instruction LiveDebugValues is evaluating.
+ unsigned CurInst;
+
+ /// Variable tracker -- listens to DBG_VALUEs occurring as InstrRefBasedImpl
+ /// steps through a block. Reads the values at each location from the
+ /// MLocTracker object.
+ VLocTracker *VTracker = nullptr;
+
+ /// Tracker for transfers, listens to DBG_VALUEs and transfers of values
+ /// between locations during stepping, creates new DBG_VALUEs when values move
+ /// location.
+ TransferTracker *TTracker = nullptr;
+
+ /// Blocks which are artificial, i.e. blocks which exclusively contain
+ /// instructions without DebugLocs, or with line 0 locations.
+ SmallPtrSet<const MachineBasicBlock *, 16> ArtificialBlocks;
+
+ // Mapping of blocks to and from their RPOT order.
+ DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
+ DenseMap<const MachineBasicBlock *, unsigned int> BBToOrder;
+ DenseMap<unsigned, unsigned> BBNumToRPO;
+
+ /// Pair of MachineInstr, and its 1-based offset into the containing block.
+ using InstAndNum = std::pair<const MachineInstr *, unsigned>;
+ /// Map from debug instruction number to the MachineInstr labelled with that
+ /// number, and its location within the function. Used to transform
+ /// instruction numbers in DBG_INSTR_REFs into machine value numbers.
+ std::map<uint64_t, InstAndNum> DebugInstrNumToInstr;
+
+ /// Record of where we observed a DBG_PHI instruction.
+ class DebugPHIRecord {
+ public:
+ uint64_t InstrNum; ///< Instruction number of this DBG_PHI.
+ MachineBasicBlock *MBB; ///< Block where DBG_PHI occurred.
+ ValueIDNum ValueRead; ///< The value number read by the DBG_PHI.
+ LocIdx ReadLoc; ///< Register/Stack location the DBG_PHI reads.
+
+ operator unsigned() const { return InstrNum; }
+ };
+
+ /// Map from instruction numbers defined by DBG_PHIs to a record of what that
+ /// DBG_PHI read and where. Populated and edited during the machine value
+ /// location problem -- we use LLVMs SSA Updater to fix changes by
+ /// optimizations that destroy PHI instructions.
+ SmallVector<DebugPHIRecord, 32> DebugPHINumToValue;
+
+ // Map of overlapping variable fragments.
+ OverlapMap OverlapFragments;
+ VarToFragments SeenFragments;
+
+ /// Tests whether this instruction is a spill to a stack slot.
+ bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF);
+
+ /// Decide if @MI is a spill instruction and return true if it is. We use 2
+ /// criteria to make this decision:
+ /// - Is this instruction a store to a spill slot?
+ /// - Is there a register operand that is both used and killed?
+ /// TODO: Store optimization can fold spills into other stores (including
+ /// other spills). We do not handle this yet (more than one memory operand).
+ bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF,
+ unsigned &Reg);
+
+ /// If a given instruction is identified as a spill, return the spill slot
+ /// and set \p Reg to the spilled register.
+ Optional<SpillLocationNo> isRestoreInstruction(const MachineInstr &MI,
+ MachineFunction *MF, unsigned &Reg);
+
+ /// Given a spill instruction, extract the spill slot information, ensure it's
+ /// tracked, and return the spill number.
+ SpillLocationNo extractSpillBaseRegAndOffset(const MachineInstr &MI);
+
+ /// Observe a single instruction while stepping through a block.
+ void process(MachineInstr &MI, ValueIDNum **MLiveOuts = nullptr,
+ ValueIDNum **MLiveIns = nullptr);
+
+ /// Examines whether \p MI is a DBG_VALUE and notifies trackers.
+ /// \returns true if MI was recognized and processed.
+ bool transferDebugValue(const MachineInstr &MI);
+
+ /// Examines whether \p MI is a DBG_INSTR_REF and notifies trackers.
+ /// \returns true if MI was recognized and processed.
+ bool transferDebugInstrRef(MachineInstr &MI, ValueIDNum **MLiveOuts,
+ ValueIDNum **MLiveIns);
+
+ /// Stores value-information about where this PHI occurred, and what
+ /// instruction number is associated with it.
+ /// \returns true if MI was recognized and processed.
+ bool transferDebugPHI(MachineInstr &MI);
+
+ /// Examines whether \p MI is copy instruction, and notifies trackers.
+ /// \returns true if MI was recognized and processed.
+ bool transferRegisterCopy(MachineInstr &MI);
+
+ /// Examines whether \p MI is stack spill or restore instruction, and
+ /// notifies trackers. \returns true if MI was recognized and processed.
+ bool transferSpillOrRestoreInst(MachineInstr &MI);
+
+ /// Examines \p MI for any registers that it defines, and notifies trackers.
+ void transferRegisterDef(MachineInstr &MI);
+
+ /// Copy one location to the other, accounting for movement of subregisters
+ /// too.
+ void performCopy(Register Src, Register Dst);
+
+ void accumulateFragmentMap(MachineInstr &MI);
+
+ /// Determine the machine value number referred to by (potentially several)
+ /// DBG_PHI instructions. Block duplication and tail folding can duplicate
+ /// DBG_PHIs, shifting the position where values in registers merge, and
+ /// forming another mini-ssa problem to solve.
+ /// \p Here the position of a DBG_INSTR_REF seeking a machine value number
+ /// \p InstrNum Debug instruction number defined by DBG_PHI instructions.
+ /// \returns The machine value number at position Here, or None.
+ Optional<ValueIDNum> resolveDbgPHIs(MachineFunction &MF,
+ ValueIDNum **MLiveOuts,
+ ValueIDNum **MLiveIns, MachineInstr &Here,
+ uint64_t InstrNum);
+
+ /// Step through the function, recording register definitions and movements
+ /// in an MLocTracker. Convert the observations into a per-block transfer
+ /// function in \p MLocTransfer, suitable for using with the machine value
+ /// location dataflow problem.
+ void
+ produceMLocTransferFunction(MachineFunction &MF,
+ SmallVectorImpl<MLocTransferMap> &MLocTransfer,
+ unsigned MaxNumBlocks);
+
+ /// Solve the machine value location dataflow problem. Takes as input the
+ /// transfer functions in \p MLocTransfer. Writes the output live-in and
+ /// live-out arrays to the (initialized to zero) multidimensional arrays in
+ /// \p MInLocs and \p MOutLocs. The outer dimension is indexed by block
+ /// number, the inner by LocIdx.
+ void buildMLocValueMap(MachineFunction &MF, ValueIDNum **MInLocs,
+ ValueIDNum **MOutLocs,
+ SmallVectorImpl<MLocTransferMap> &MLocTransfer);
+
+ /// Examine the stack indexes (i.e. offsets within the stack) to find the
+ /// basic units of interference -- like reg units, but for the stack.
+ void findStackIndexInterference(SmallVectorImpl<unsigned> &Slots);
+
+ /// Install PHI values into the live-in array for each block, according to
+ /// the IDF of each register.
+ void placeMLocPHIs(MachineFunction &MF,
+ SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
+ ValueIDNum **MInLocs,
+ SmallVectorImpl<MLocTransferMap> &MLocTransfer);
+
+ /// Calculate the iterated-dominance-frontier for a set of defs, using the
+ /// existing LLVM facilities for this. Works for a single "value" or
+ /// machine/variable location.
+ /// \p AllBlocks Set of blocks where we might consume the value.
+ /// \p DefBlocks Set of blocks where the value/location is defined.
+ /// \p PHIBlocks Output set of blocks where PHIs must be placed.
+ void BlockPHIPlacement(const SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
+ const SmallPtrSetImpl<MachineBasicBlock *> &DefBlocks,
+ SmallVectorImpl<MachineBasicBlock *> &PHIBlocks);
+
+ /// Perform a control flow join (lattice value meet) of the values in machine
+ /// locations at \p MBB. Follows the algorithm described in the file-comment,
+ /// reading live-outs of predecessors from \p OutLocs, the current live ins
+ /// from \p InLocs, and assigning the newly computed live ins back into
+ /// \p InLocs. \returns two bools -- the first indicates whether a change
+ /// was made, the second whether a lattice downgrade occurred. If the latter
+ /// is true, revisiting this block is necessary.
+ bool mlocJoin(MachineBasicBlock &MBB,
+ SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
+ ValueIDNum **OutLocs, ValueIDNum *InLocs);
+
+ /// Solve the variable value dataflow problem, for a single lexical scope.
+ /// Uses the algorithm from the file comment to resolve control flow joins
+ /// using PHI placement and value propagation. Reads the locations of machine
+ /// values from the \p MInLocs and \p MOutLocs arrays (see buildMLocValueMap)
+ /// and reads the variable values transfer function from \p AllTheVlocs.
+ /// Live-in and Live-out variable values are stored locally, with the live-ins
+ /// permanently stored to \p Output once a fixedpoint is reached.
+ /// \p VarsWeCareAbout contains a collection of the variables in \p Scope
+ /// that we should be tracking.
+ /// \p AssignBlocks contains the set of blocks that aren't in \p DILoc's
+ /// scope, but which do contain DBG_VALUEs, which VarLocBasedImpl tracks
+ /// locations through.
+ void buildVLocValueMap(const DILocation *DILoc,
+ const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
+ SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks,
+ LiveInsT &Output, ValueIDNum **MOutLocs,
+ ValueIDNum **MInLocs,
+ SmallVectorImpl<VLocTracker> &AllTheVLocs);
+
+ /// Attempt to eliminate un-necessary PHIs on entry to a block. Examines the
+ /// live-in values coming from predecessors live-outs, and replaces any PHIs
+ /// already present in this blocks live-ins with a live-through value if the
+ /// PHI isn't needed.
+ /// \p LiveIn Old live-in value, overwritten with new one if live-in changes.
+ /// \returns true if any live-ins change value, either from value propagation
+ /// or PHI elimination.
+ bool vlocJoin(MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs,
+ SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks,
+ SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore,
+ DbgValue &LiveIn);
+
+ /// For the given block and live-outs feeding into it, try to find a
+ /// machine location where all the variable values join together.
+ /// \returns Value ID of a machine PHI if an appropriate one is available.
+ Optional<ValueIDNum>
+ pickVPHILoc(const MachineBasicBlock &MBB, const DebugVariable &Var,
+ const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs,
+ const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders);
+
+ /// Given the solutions to the two dataflow problems, machine value locations
+ /// in \p MInLocs and live-in variable values in \p SavedLiveIns, runs the
+ /// TransferTracker class over the function to produce live-in and transfer
+ /// DBG_VALUEs, then inserts them. Groups of DBG_VALUEs are inserted in the
+ /// order given by AllVarsNumbering -- this could be any stable order, but
+ /// right now "order of appearence in function, when explored in RPO", so
+ /// that we can compare explictly against VarLocBasedImpl.
+ void emitLocations(MachineFunction &MF, LiveInsT SavedLiveIns,
+ ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
+ DenseMap<DebugVariable, unsigned> &AllVarsNumbering,
+ const TargetPassConfig &TPC);
+
+ /// Boilerplate computation of some initial sets, artifical blocks and
+ /// RPOT block ordering.
+ void initialSetup(MachineFunction &MF);
+
+ bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree,
+ TargetPassConfig *TPC, unsigned InputBBLimit,
+ unsigned InputDbgValLimit) override;
+
+public:
+ /// Default construct and initialize the pass.
+ InstrRefBasedLDV();
+
+ LLVM_DUMP_METHOD
+ void dump_mloc_transfer(const MLocTransferMap &mloc_transfer) const;
+
+ bool isCalleeSaved(LocIdx L) const;
+
+ bool hasFoldedStackStore(const MachineInstr &MI) {
+ // Instruction must have a memory operand that's a stack slot, and isn't
+ // aliased, meaning it's a spill from regalloc instead of a variable.
+ // If it's aliased, we can't guarantee its value.
+ if (!MI.hasOneMemOperand())
+ return false;
+ auto *MemOperand = *MI.memoperands_begin();
+ return MemOperand->isStore() &&
+ MemOperand->getPseudoValue() &&
+ MemOperand->getPseudoValue()->kind() == PseudoSourceValue::FixedStack
+ && !MemOperand->getPseudoValue()->isAliased(MFI);
+ }
+
+ Optional<LocIdx> findLocationForMemOperand(const MachineInstr &MI);
+};
+
+} // namespace LiveDebugValues
+
+namespace llvm {
+using namespace LiveDebugValues;
+
+template <> struct DenseMapInfo<LocIdx> {
+ static inline LocIdx getEmptyKey() { return LocIdx::MakeIllegalLoc(); }
+ static inline LocIdx getTombstoneKey() { return LocIdx::MakeTombstoneLoc(); }
+
+ static unsigned getHashValue(const LocIdx &Loc) { return Loc.asU64(); }
+
+ static bool isEqual(const LocIdx &A, const LocIdx &B) { return A == B; }
+};
+
+template <> struct DenseMapInfo<ValueIDNum> {
+ static inline ValueIDNum getEmptyKey() { return ValueIDNum::EmptyValue; }
+ static inline ValueIDNum getTombstoneKey() {
+ return ValueIDNum::TombstoneValue;
+ }
+
+ static unsigned getHashValue(const ValueIDNum &Val) { return Val.asU64(); }
+
+ static bool isEqual(const ValueIDNum &A, const ValueIDNum &B) {
+ return A == B;
+ }
+};
+
+} // end namespace llvm
+
+#endif /* LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H */
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
index 38e803d1abb5..691977dc34e6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
@@ -40,6 +40,19 @@ static cl::opt<bool>
"normal DBG_VALUE inputs"),
cl::init(false));
+// Options to prevent pathological compile-time behavior. If InputBBLimit and
+// InputDbgValueLimit are both exceeded, range extension is disabled.
+static cl::opt<unsigned> InputBBLimit(
+ "livedebugvalues-input-bb-limit",
+ cl::desc("Maximum input basic blocks before DBG_VALUE limit applies"),
+ cl::init(10000), cl::Hidden);
+static cl::opt<unsigned> InputDbgValueLimit(
+ "livedebugvalues-input-dbg-value-limit",
+ cl::desc(
+ "Maximum input DBG_VALUE insts supported by debug range extension"),
+ cl::init(50000), cl::Hidden);
+
+namespace {
/// Generic LiveDebugValues pass. Calls through to VarLocBasedLDV or
/// InstrRefBasedLDV to perform location propagation, via the LDVImpl
/// base class.
@@ -48,10 +61,7 @@ public:
static char ID;
LiveDebugValues();
- ~LiveDebugValues() {
- if (TheImpl)
- delete TheImpl;
- }
+ ~LiveDebugValues() {}
/// Calculate the liveness information for the given machine function.
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -67,9 +77,12 @@ public:
}
private:
- LDVImpl *TheImpl;
+ std::unique_ptr<LDVImpl> InstrRefImpl;
+ std::unique_ptr<LDVImpl> VarLocImpl;
TargetPassConfig *TPC;
+ MachineDominatorTree MDT;
};
+} // namespace
char LiveDebugValues::ID = 0;
@@ -81,27 +94,26 @@ INITIALIZE_PASS(LiveDebugValues, DEBUG_TYPE, "Live DEBUG_VALUE analysis", false,
/// Default construct and initialize the pass.
LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) {
initializeLiveDebugValuesPass(*PassRegistry::getPassRegistry());
- TheImpl = nullptr;
+ InstrRefImpl =
+ std::unique_ptr<LDVImpl>(llvm::makeInstrRefBasedLiveDebugValues());
+ VarLocImpl = std::unique_ptr<LDVImpl>(llvm::makeVarLocBasedLiveDebugValues());
}
bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
- if (!TheImpl) {
- TPC = getAnalysisIfAvailable<TargetPassConfig>();
-
- bool InstrRefBased = false;
- if (TPC) {
- auto &TM = TPC->getTM<TargetMachine>();
- InstrRefBased = TM.Options.ValueTrackingVariableLocations;
- }
-
- // Allow the user to force selection of InstrRef LDV.
- InstrRefBased |= ForceInstrRefLDV;
-
- if (InstrRefBased)
- TheImpl = llvm::makeInstrRefBasedLiveDebugValues();
- else
- TheImpl = llvm::makeVarLocBasedLiveDebugValues();
+ bool InstrRefBased = MF.useDebugInstrRef();
+ // Allow the user to force selection of InstrRef LDV.
+ InstrRefBased |= ForceInstrRefLDV;
+
+ TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ LDVImpl *TheImpl = &*VarLocImpl;
+
+ MachineDominatorTree *DomTree = nullptr;
+ if (InstrRefBased) {
+ DomTree = &MDT;
+ MDT.calculate(MF);
+ TheImpl = &*InstrRefImpl;
}
- return TheImpl->ExtendRanges(MF, TPC);
+ return TheImpl->ExtendRanges(MF, DomTree, TPC, InputBBLimit,
+ InputDbgValueLimit);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
index 9c910f180b9f..a5936c8a96f0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H
#define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -23,7 +24,9 @@ inline namespace SharedLiveDebugValues {
// implementation.
class LDVImpl {
public:
- virtual bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) = 0;
+ virtual bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree,
+ TargetPassConfig *TPC, unsigned InputBBLimit,
+ unsigned InputDbgValLimit) = 0;
virtual ~LDVImpl() {}
};
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
index 1e6d65c18953..a632d3d9ce76 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
@@ -155,6 +155,7 @@
#include <cassert>
#include <cstdint>
#include <functional>
+#include <map>
#include <queue>
#include <tuple>
#include <utility>
@@ -166,18 +167,6 @@ using namespace llvm;
STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
-// Options to prevent pathological compile-time behavior. If InputBBLimit and
-// InputDbgValueLimit are both exceeded, range extension is disabled.
-static cl::opt<unsigned> InputBBLimit(
- "livedebugvalues-input-bb-limit",
- cl::desc("Maximum input basic blocks before DBG_VALUE limit applies"),
- cl::init(10000), cl::Hidden);
-static cl::opt<unsigned> InputDbgValueLimit(
- "livedebugvalues-input-dbg-value-limit",
- cl::desc(
- "Maximum input DBG_VALUE insts supported by debug range extension"),
- cl::init(50000), cl::Hidden);
-
/// If \p Op is a stack or frame register return true, otherwise return false.
/// This is used to avoid basing the debug entry values on the registers, since
/// we do not support it at the moment.
@@ -296,6 +285,8 @@ private:
LexicalScopes LS;
VarLocSet::Allocator Alloc;
+ const MachineInstr *LastNonDbgMI;
+
enum struct TransferKind { TransferCopy, TransferSpill, TransferRestore };
using FragmentInfo = DIExpression::FragmentInfo;
@@ -555,7 +546,6 @@ private:
EVKind == EntryValueLocKind::EntryValueKind ? Orig.getReg()
: Register(Loc.RegNo),
false));
- MOs.back().setIsDebug();
break;
case MachineLocKind::SpillLocKind: {
// Spills are indirect DBG_VALUEs, with a base register and offset.
@@ -565,9 +555,10 @@ private:
unsigned Base = Loc.SpillLocation.SpillBase;
auto *TRI = MF.getSubtarget().getRegisterInfo();
if (MI.isNonListDebugValue()) {
- DIExpr =
- TRI->prependOffsetExpression(DIExpr, DIExpression::ApplyOffset,
- Loc.SpillLocation.SpillOffset);
+ auto Deref = Indirect ? DIExpression::DerefAfter : 0;
+ DIExpr = TRI->prependOffsetExpression(
+ DIExpr, DIExpression::ApplyOffset | Deref,
+ Loc.SpillLocation.SpillOffset);
Indirect = true;
} else {
SmallVector<uint64_t, 4> Ops;
@@ -576,7 +567,6 @@ private:
DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, I);
}
MOs.push_back(MachineOperand::CreateReg(Base, false));
- MOs.back().setIsDebug();
break;
}
case MachineLocKind::ImmediateKind: {
@@ -626,7 +616,7 @@ private:
unsigned getRegIdx(Register Reg) const {
for (unsigned Idx = 0; Idx < Locs.size(); ++Idx)
if (Locs[Idx].Kind == MachineLocKind::RegisterKind &&
- Locs[Idx].Value.RegNo == Reg)
+ Register{static_cast<unsigned>(Locs[Idx].Value.RegNo)} == Reg)
return Idx;
llvm_unreachable("Could not find given Reg in Locs");
}
@@ -635,7 +625,7 @@ private:
/// add each of them to \p Regs and return true.
bool getDescribingRegs(SmallVectorImpl<uint32_t> &Regs) const {
bool AnyRegs = false;
- for (auto Loc : Locs)
+ for (const auto &Loc : Locs)
if (Loc.Kind == MachineLocKind::RegisterKind) {
Regs.push_back(Loc.Value.RegNo);
AnyRegs = true;
@@ -801,6 +791,10 @@ private:
LocIndex LocationID; ///< Location number for the transfer dest.
};
using TransferMap = SmallVector<TransferDebugPair, 4>;
+ // Types for recording Entry Var Locations emitted by a single MachineInstr,
+ // as well as recording MachineInstr which last defined a register.
+ using InstToEntryLocMap = std::multimap<const MachineInstr *, LocIndex>;
+ using RegDefToInstMap = DenseMap<Register, MachineInstr *>;
// Types for recording sets of variable fragments that overlap. For a given
// local variable, we record all other fragments of that variable that could
@@ -974,13 +968,22 @@ private:
Register NewReg = Register());
void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs);
+ VarLocMap &VarLocIDs,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs);
void transferSpillOrRestoreInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs, TransferMap &Transfers);
- bool removeEntryValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs, const VarLoc &EntryVL);
+ void cleanupEntryValueTransfers(const MachineInstr *MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs, const VarLoc &EntryVL,
+ InstToEntryLocMap &EntryValTransfers);
+ void removeEntryValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs, const VarLoc &EntryVL,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs);
void emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs, TransferMap &Transfers,
+ VarLocMap &VarLocIDs,
+ InstToEntryLocMap &EntryValTransfers,
VarLocsInRange &KillSet);
void recordEntryValue(const MachineInstr &MI,
const DefinedRegsSet &DefinedRegs,
@@ -988,12 +991,16 @@ private:
void transferRegisterCopy(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs, TransferMap &Transfers);
void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs, TransferMap &Transfers);
+ VarLocMap &VarLocIDs,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs);
bool transferTerminator(MachineBasicBlock *MBB, OpenRangesSet &OpenRanges,
VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);
void process(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs, TransferMap &Transfers);
+ VarLocMap &VarLocIDs, TransferMap &Transfers,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs);
void accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments,
OverlapMap &OLapMap);
@@ -1007,7 +1014,9 @@ private:
/// had their instruction creation deferred.
void flushPendingLocs(VarLocInMBB &PendingInLocs, VarLocMap &VarLocIDs);
- bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) override;
+ bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree,
+ TargetPassConfig *TPC, unsigned InputBBLimit,
+ unsigned InputDbgValLimit) override;
public:
/// Default construct and initialize the pass.
@@ -1225,62 +1234,100 @@ VarLocBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
return {Reg, Offset};
}
+/// Do cleanup of \p EntryValTransfers created by \p TRInst, by removing the
+/// Transfer, which uses the to-be-deleted \p EntryVL.
+void VarLocBasedLDV::cleanupEntryValueTransfers(
+ const MachineInstr *TRInst, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs,
+ const VarLoc &EntryVL, InstToEntryLocMap &EntryValTransfers) {
+ if (EntryValTransfers.empty() || TRInst == nullptr)
+ return;
+
+ auto TransRange = EntryValTransfers.equal_range(TRInst);
+ for (auto TDPair : llvm::make_range(TransRange.first, TransRange.second)) {
+ const VarLoc &EmittedEV = VarLocIDs[TDPair.second];
+ if (std::tie(EntryVL.Var, EntryVL.Locs[0].Value.RegNo, EntryVL.Expr) ==
+ std::tie(EmittedEV.Var, EmittedEV.Locs[0].Value.RegNo,
+ EmittedEV.Expr)) {
+ OpenRanges.erase(EmittedEV);
+ EntryValTransfers.erase(TRInst);
+ break;
+ }
+ }
+}
+
/// Try to salvage the debug entry value if we encounter a new debug value
/// describing the same parameter, otherwise stop tracking the value. Return
-/// true if we should stop tracking the entry value, otherwise return false.
-bool VarLocBasedLDV::removeEntryValue(const MachineInstr &MI,
- OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs,
- const VarLoc &EntryVL) {
+/// true if we should stop tracking the entry value and do the cleanup of
+/// emitted Entry Value Transfers, otherwise return false.
+void VarLocBasedLDV::removeEntryValue(const MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ const VarLoc &EntryVL,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs) {
// Skip the DBG_VALUE which is the debug entry value itself.
- if (MI.isIdenticalTo(EntryVL.MI))
- return false;
+ if (&MI == &EntryVL.MI)
+ return;
// If the parameter's location is not register location, we can not track
- // the entry value any more. In addition, if the debug expression from the
- // DBG_VALUE is not empty, we can assume the parameter's value has changed
- // indicating that we should stop tracking its entry value as well.
- if (!MI.getDebugOperand(0).isReg() ||
- MI.getDebugExpression()->getNumElements() != 0)
- return true;
-
- // If the DBG_VALUE comes from a copy instruction that copies the entry value,
- // it means the parameter's value has not changed and we should be able to use
- // its entry value.
+ // the entry value any more. It doesn't have the TransferInst which defines
+ // register, so no Entry Value Transfers have been emitted already.
+ if (!MI.getDebugOperand(0).isReg())
+ return;
+
+ // Try to get non-debug instruction responsible for the DBG_VALUE.
+ const MachineInstr *TransferInst = nullptr;
Register Reg = MI.getDebugOperand(0).getReg();
- auto I = std::next(MI.getReverseIterator());
- const MachineOperand *SrcRegOp, *DestRegOp;
- if (I != MI.getParent()->rend()) {
+ if (Reg.isValid() && RegSetInstrs.find(Reg) != RegSetInstrs.end())
+ TransferInst = RegSetInstrs.find(Reg)->second;
+
+ // Case of the parameter's DBG_VALUE at the start of entry MBB.
+ if (!TransferInst && !LastNonDbgMI && MI.getParent()->isEntryBlock())
+ return;
+ // If the debug expression from the DBG_VALUE is not empty, we can assume the
+ // parameter's value has changed indicating that we should stop tracking its
+ // entry value as well.
+ if (MI.getDebugExpression()->getNumElements() == 0 && TransferInst) {
+ // If the DBG_VALUE comes from a copy instruction that copies the entry
+ // value, it means the parameter's value has not changed and we should be
+ // able to use its entry value.
// TODO: Try to keep tracking of an entry value if we encounter a propagated
// DBG_VALUE describing the copy of the entry value. (Propagated entry value
// does not indicate the parameter modification.)
- auto DestSrc = TII->isCopyInstr(*I);
- if (!DestSrc)
- return true;
-
- SrcRegOp = DestSrc->Source;
- DestRegOp = DestSrc->Destination;
- if (Reg != DestRegOp->getReg())
- return true;
-
- for (uint64_t ID : OpenRanges.getEntryValueBackupVarLocs()) {
- const VarLoc &VL = VarLocIDs[LocIndex::fromRawInteger(ID)];
- if (VL.isEntryValueCopyBackupReg(Reg) &&
- // Entry Values should not be variadic.
- VL.MI.getDebugOperand(0).getReg() == SrcRegOp->getReg())
- return false;
+ auto DestSrc = TII->isCopyInstr(*TransferInst);
+ if (DestSrc) {
+ const MachineOperand *SrcRegOp, *DestRegOp;
+ SrcRegOp = DestSrc->Source;
+ DestRegOp = DestSrc->Destination;
+ if (Reg == DestRegOp->getReg()) {
+ for (uint64_t ID : OpenRanges.getEntryValueBackupVarLocs()) {
+ const VarLoc &VL = VarLocIDs[LocIndex::fromRawInteger(ID)];
+ if (VL.isEntryValueCopyBackupReg(Reg) &&
+ // Entry Values should not be variadic.
+ VL.MI.getDebugOperand(0).getReg() == SrcRegOp->getReg())
+ return;
+ }
+ }
}
}
- return true;
+ LLVM_DEBUG(dbgs() << "Deleting a DBG entry value because of: ";
+ MI.print(dbgs(), /*IsStandalone*/ false,
+ /*SkipOpers*/ false, /*SkipDebugLoc*/ false,
+ /*AddNewLine*/ true, TII));
+ cleanupEntryValueTransfers(TransferInst, OpenRanges, VarLocIDs, EntryVL,
+ EntryValTransfers);
+ OpenRanges.erase(EntryVL);
}
/// End all previous ranges related to @MI and start a new range from @MI
/// if it is a DBG_VALUE instr.
void VarLocBasedLDV::transferDebugValue(const MachineInstr &MI,
- OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs) {
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs) {
if (!MI.isDebugValue())
return;
const DILocalVariable *Var = MI.getDebugVariable();
@@ -1297,13 +1344,8 @@ void VarLocBasedLDV::transferDebugValue(const MachineInstr &MI,
auto EntryValBackupID = OpenRanges.getEntryValueBackup(V);
if (Var->isParameter() && EntryValBackupID) {
const VarLoc &EntryVL = VarLocIDs[EntryValBackupID->back()];
- if (removeEntryValue(MI, OpenRanges, VarLocIDs, EntryVL)) {
- LLVM_DEBUG(dbgs() << "Deleting a DBG entry value because of: ";
- MI.print(dbgs(), /*IsStandalone*/ false,
- /*SkipOpers*/ false, /*SkipDebugLoc*/ false,
- /*AddNewLine*/ true, TII));
- OpenRanges.erase(EntryVL);
- }
+ removeEntryValue(MI, OpenRanges, VarLocIDs, EntryVL, EntryValTransfers,
+ RegSetInstrs);
}
if (all_of(MI.debug_operands(), [](const MachineOperand &MO) {
@@ -1351,7 +1393,7 @@ void VarLocBasedLDV::collectAllVarLocs(SmallVectorImpl<VarLoc> &Collected,
void VarLocBasedLDV::emitEntryValues(MachineInstr &MI,
OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs,
- TransferMap &Transfers,
+ InstToEntryLocMap &EntryValTransfers,
VarLocsInRange &KillSet) {
// Do not insert entry value locations after a terminator.
if (MI.isTerminator())
@@ -1377,7 +1419,9 @@ void VarLocBasedLDV::emitEntryValues(MachineInstr &MI,
VarLoc EntryLoc = VarLoc::CreateEntryLoc(EntryVL.MI, LS, EntryVL.Expr,
EntryVL.Locs[0].Value.RegNo);
LocIndices EntryValueIDs = VarLocIDs.insert(EntryLoc);
- Transfers.push_back({&MI, EntryValueIDs.back()});
+ assert(EntryValueIDs.size() == 1 &&
+ "EntryValue loc should not be variadic");
+ EntryValTransfers.insert({&MI, EntryValueIDs.back()});
OpenRanges.insert(EntryValueIDs, EntryLoc);
}
}
@@ -1454,9 +1498,11 @@ void VarLocBasedLDV::insertTransferDebugPair(
}
/// A definition of a register may mark the end of a range.
-void VarLocBasedLDV::transferRegisterDef(
- MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs,
- TransferMap &Transfers) {
+void VarLocBasedLDV::transferRegisterDef(MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs) {
// Meta Instructions do not affect the debug liveness of any register they
// define.
@@ -1479,6 +1525,8 @@ void VarLocBasedLDV::transferRegisterDef(
for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
// FIXME: Can we break out of this loop early if no insertion occurs?
DeadRegs.insert(*RAI);
+ RegSetInstrs.erase(MO.getReg());
+ RegSetInstrs.insert({MO.getReg(), &MI});
} else if (MO.isRegMask()) {
RegMasks.push_back(MO.getRegMask());
}
@@ -1505,6 +1553,10 @@ void VarLocBasedLDV::transferRegisterDef(
});
if (AnyRegMaskKillsReg)
DeadRegs.insert(Reg);
+ if (AnyRegMaskKillsReg) {
+ RegSetInstrs.erase(Reg);
+ RegSetInstrs.insert({Reg, &MI});
+ }
}
}
@@ -1518,7 +1570,7 @@ void VarLocBasedLDV::transferRegisterDef(
if (TPC) {
auto &TM = TPC->getTM<TargetMachine>();
if (TM.Options.ShouldEmitDebugEntryValues())
- emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, KillSet);
+ emitEntryValues(MI, OpenRanges, VarLocIDs, EntryValTransfers, KillSet);
}
}
@@ -1851,9 +1903,15 @@ void VarLocBasedLDV::accumulateFragmentMap(MachineInstr &MI,
/// This routine creates OpenRanges.
void VarLocBasedLDV::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs, TransferMap &Transfers) {
- transferDebugValue(MI, OpenRanges, VarLocIDs);
- transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers);
+ VarLocMap &VarLocIDs, TransferMap &Transfers,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs) {
+ if (!MI.isDebugInstr())
+ LastNonDbgMI = &MI;
+ transferDebugValue(MI, OpenRanges, VarLocIDs, EntryValTransfers,
+ RegSetInstrs);
+ transferRegisterDef(MI, OpenRanges, VarLocIDs, EntryValTransfers,
+ RegSetInstrs);
transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers);
transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers);
}
@@ -2048,7 +2106,11 @@ void VarLocBasedLDV::recordEntryValue(const MachineInstr &MI,
/// Calculate the liveness information for the given machine function and
/// extend ranges across basic blocks.
-bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) {
+bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF,
+ MachineDominatorTree *DomTree,
+ TargetPassConfig *TPC, unsigned InputBBLimit,
+ unsigned InputDbgValLimit) {
+ (void)DomTree;
LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n");
if (!MF.getFunction().getSubprogram())
@@ -2079,6 +2141,10 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) {
VarLocInMBB InLocs; // Ranges that are incoming after joining.
TransferMap Transfers; // DBG_VALUEs associated with transfers (such as
// spills, copies and restores).
+ // Map responsible MI to attached Transfer emitted from Backup Entry Value.
+ InstToEntryLocMap EntryValTransfers;
+ // Map a Register to the last MI which clobbered it.
+ RegDefToInstMap RegSetInstrs;
VarToFragments SeenFragments;
@@ -2141,7 +2207,7 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) {
for (auto &MI : MBB)
if (MI.isDebugValue())
++NumInputDbgValues;
- if (NumInputDbgValues > InputDbgValueLimit) {
+ if (NumInputDbgValues > InputDbgValLimit) {
LLVM_DEBUG(dbgs() << "Disabling VarLocBasedLDV: " << MF.getName()
<< " has " << RPONumber << " basic blocks and "
<< NumInputDbgValues
@@ -2175,8 +2241,11 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) {
// operate with registers that correspond to user variables.
// First load any pending inlocs.
OpenRanges.insertFromLocSet(getVarLocsInMBB(MBB, InLocs), VarLocIDs);
+ LastNonDbgMI = nullptr;
+ RegSetInstrs.clear();
for (auto &MI : *MBB)
- process(MI, OpenRanges, VarLocIDs, Transfers);
+ process(MI, OpenRanges, VarLocIDs, Transfers, EntryValTransfers,
+ RegSetInstrs);
OLChanged |= transferTerminator(MBB, OpenRanges, OutLocs, VarLocIDs);
LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs,
@@ -2210,6 +2279,18 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) {
}
Transfers.clear();
+ // Add DBG_VALUEs created using Backup Entry Value location.
+ for (auto &TR : EntryValTransfers) {
+ MachineInstr *TRInst = const_cast<MachineInstr *>(TR.first);
+ assert(!TRInst->isTerminator() &&
+ "Cannot insert DBG_VALUE after terminator");
+ MachineBasicBlock *MBB = TRInst->getParent();
+ const VarLoc &VL = VarLocIDs[TR.second];
+ MachineInstr *MI = VL.BuildDbgValue(MF);
+ MBB->insertAfterBundle(TRInst->getIterator(), MI);
+ }
+ EntryValTransfers.clear();
+
// Deferred inlocs will not have had any DBG_VALUE insts created; do
// that now.
flushPendingLocs(InLocs, VarLocIDs);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 54058a547928..dcd546f9c6db 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -417,7 +417,7 @@ public:
void addDef(SlotIndex Idx, ArrayRef<MachineOperand> LocMOs, bool IsIndirect,
bool IsList, const DIExpression &Expr) {
SmallVector<unsigned> Locs;
- for (MachineOperand Op : LocMOs)
+ for (const MachineOperand &Op : LocMOs)
Locs.push_back(getLocationNo(Op));
DbgVariableValue DbgValue(Locs, IsIndirect, IsList, Expr);
// Add a singular (Idx,Idx) -> value mapping.
@@ -1294,13 +1294,9 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf, bool InstrRef) {
static void removeDebugInstrs(MachineFunction &mf) {
for (MachineBasicBlock &MBB : mf) {
- for (auto MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ) {
- if (!MBBI->isDebugInstr()) {
- ++MBBI;
- continue;
- }
- MBBI = MBB.erase(MBBI);
- }
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB))
+ if (MI.isDebugInstr())
+ MBB.erase(&MI);
}
}
@@ -1314,12 +1310,7 @@ bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
// Have we been asked to track variable locations using instruction
// referencing?
- bool InstrRef = false;
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- if (TPC) {
- auto &TM = TPC->getTM<TargetMachine>();
- InstrRef = TM.Options.ValueTrackingVariableLocations;
- }
+ bool InstrRef = mf.useDebugInstrRef();
if (!pImpl)
pImpl = new LDVImpl(this);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
index 1eed0ec5bbbe..9ded0fb6ae0a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
@@ -592,21 +592,10 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End,
VNInfo *ValNo = I->valno;
if (I->start == Start) {
if (I->end == End) {
- if (RemoveDeadValNo) {
- // Check if val# is dead.
- bool isDead = true;
- for (const_iterator II = begin(), EE = end(); II != EE; ++II)
- if (II != I && II->valno == ValNo) {
- isDead = false;
- break;
- }
- if (isDead) {
- // Now that ValNo is dead, remove it.
- markValNoForDeletion(ValNo);
- }
- }
-
segments.erase(I); // Removed the whole Segment.
+
+ if (RemoveDeadValNo)
+ removeValNoIfDead(ValNo);
} else
I->start = End;
return;
@@ -627,13 +616,25 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End,
segments.insert(std::next(I), Segment(End, OldEnd, ValNo));
}
+LiveRange::iterator LiveRange::removeSegment(iterator I, bool RemoveDeadValNo) {
+ VNInfo *ValNo = I->valno;
+ I = segments.erase(I);
+ if (RemoveDeadValNo)
+ removeValNoIfDead(ValNo);
+ return I;
+}
+
+void LiveRange::removeValNoIfDead(VNInfo *ValNo) {
+ if (none_of(*this, [=](const Segment &S) { return S.valno == ValNo; }))
+ markValNoForDeletion(ValNo);
+}
+
/// removeValNo - Remove all the segments defined by the specified value#.
/// Also remove the value# from value# list.
void LiveRange::removeValNo(VNInfo *ValNo) {
if (empty()) return;
- segments.erase(remove_if(*this, [ValNo](const Segment &S) {
- return S.valno == ValNo;
- }), end());
+ llvm::erase_if(segments,
+ [ValNo](const Segment &S) { return S.valno == ValNo; });
// Now that ValNo is dead, remove it.
markValNoForDeletion(ValNo);
}
@@ -1019,7 +1020,7 @@ void LiveRange::print(raw_ostream &OS) const {
// Print value number info.
if (getNumValNums()) {
- OS << " ";
+ OS << ' ';
unsigned vnum = 0;
for (const_vni_iterator i = vni_begin(), e = vni_end(); i != e;
++i, ++vnum) {
@@ -1038,8 +1039,8 @@ void LiveRange::print(raw_ostream &OS) const {
}
void LiveInterval::SubRange::print(raw_ostream &OS) const {
- OS << " L" << PrintLaneMask(LaneMask) << ' '
- << static_cast<const LiveRange&>(*this);
+ OS << " L" << PrintLaneMask(LaneMask) << ' '
+ << static_cast<const LiveRange &>(*this);
}
void LiveInterval::print(raw_ostream &OS) const {
@@ -1048,7 +1049,7 @@ void LiveInterval::print(raw_ostream &OS) const {
// Print subranges
for (const SubRange &SR : subranges())
OS << SR;
- OS << " weight:" << Weight;
+ OS << " weight:" << Weight;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp
index dfa523d4bf41..50b31e1eb247 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp
@@ -112,7 +112,7 @@ LiveInterval *LiveIntervalUnion::getOneVReg() const {
// Scan the vector of interfering virtual registers in this union. Assume it's
// quite small.
bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
- return is_contained(*InterferingVRegs, VirtReg);
+ return is_contained(InterferingVRegs, VirtReg);
}
// Collect virtual registers in this union that interfere with this
@@ -124,14 +124,11 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
// 2. SeenAllInterferences == true: InterferingVRegs complete, iterators unused.
// 3. Iterators left at the last seen intersection.
//
-unsigned LiveIntervalUnion::Query::
-collectInterferingVRegs(unsigned MaxInterferingRegs) {
- if (!InterferingVRegs)
- InterferingVRegs.emplace();
-
+unsigned
+LiveIntervalUnion::Query::collectInterferingVRegs(unsigned MaxInterferingRegs) {
// Fast path return if we already have the desired information.
- if (SeenAllInterferences || InterferingVRegs->size() >= MaxInterferingRegs)
- return InterferingVRegs->size();
+ if (SeenAllInterferences || InterferingVRegs.size() >= MaxInterferingRegs)
+ return InterferingVRegs.size();
// Set up iterators on the first call.
if (!CheckedFirstInterference) {
@@ -160,14 +157,14 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
LiveInterval *VReg = LiveUnionI.value();
if (VReg != RecentReg && !isSeenInterference(VReg)) {
RecentReg = VReg;
- InterferingVRegs->push_back(VReg);
- if (InterferingVRegs->size() >= MaxInterferingRegs)
- return InterferingVRegs->size();
+ InterferingVRegs.push_back(VReg);
+ if (InterferingVRegs.size() >= MaxInterferingRegs)
+ return InterferingVRegs.size();
}
// This LiveUnion segment is no longer interesting.
if (!(++LiveUnionI).valid()) {
SeenAllInterferences = true;
- return InterferingVRegs->size();
+ return InterferingVRegs.size();
}
}
@@ -188,7 +185,7 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
LiveUnionI.advanceTo(LRI->start);
}
SeenAllInterferences = true;
- return InterferingVRegs->size();
+ return InterferingVRegs.size();
}
void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
index 23036c2b115f..2f97386b6d18 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -1571,15 +1571,14 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
LaneBitmask LaneMask) {
LiveInterval::iterator LII = LR.find(EndIdx);
SlotIndex lastUseIdx;
- if (LII == LR.begin()) {
- // This happens when the function is called for a subregister that only
- // occurs _after_ the range that is to be repaired.
- return;
- }
- if (LII != LR.end() && LII->start < EndIdx)
+ if (LII != LR.end() && LII->start < EndIdx) {
lastUseIdx = LII->end;
- else
+ } else if (LII == LR.begin()) {
+ // We may not have a liverange at all if this is a subregister untouched
+ // between \p Begin and \p End.
+ } else {
--LII;
+ }
for (MachineBasicBlock::iterator I = End; I != Begin;) {
--I;
@@ -1593,10 +1592,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
// FIXME: This doesn't currently handle early-clobber or multiple removed
// defs inside of the region to repair.
- for (MachineInstr::mop_iterator OI = MI.operands_begin(),
- OE = MI.operands_end();
- OI != OE; ++OI) {
- const MachineOperand &MO = *OI;
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || MO.getReg() != Reg)
continue;
@@ -1608,17 +1604,9 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
if (MO.isDef()) {
if (!isStartValid) {
if (LII->end.isDead()) {
- SlotIndex prevStart;
+ LII = LR.removeSegment(LII, true);
if (LII != LR.begin())
- prevStart = std::prev(LII)->start;
-
- // FIXME: This could be more efficient if there was a
- // removeSegment method that returned an iterator.
- LR.removeSegment(*LII, true);
- if (prevStart.isValid())
- LII = LR.find(prevStart);
- else
- LII = LR.begin();
+ --LII;
} else {
LII->start = instrIdx.getRegSlot();
LII->valno->def = instrIdx.getRegSlot();
@@ -1656,6 +1644,10 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
}
}
}
+
+ bool isStartValid = getInstructionFromIndex(LII->start);
+ if (!isStartValid && LII->end.isDead())
+ LR.removeSegment(*LII, true);
}
void
@@ -1678,22 +1670,33 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
Indexes->repairIndexesInRange(MBB, Begin, End);
+ // Make sure a live interval exists for all register operands in the range.
+ SmallVector<Register> RegsToRepair(OrigRegs.begin(), OrigRegs.end());
for (MachineBasicBlock::iterator I = End; I != Begin;) {
--I;
MachineInstr &MI = *I;
if (MI.isDebugOrPseudoInstr())
continue;
- for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(),
- MOE = MI.operands_end();
- MOI != MOE; ++MOI) {
- if (MOI->isReg() && Register::isVirtualRegister(MOI->getReg()) &&
- !hasInterval(MOI->getReg())) {
- createAndComputeVirtRegInterval(MOI->getReg());
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.getReg().isVirtual()) {
+ Register Reg = MO.getReg();
+ // If the new instructions refer to subregs but the old instructions did
+ // not, throw away any old live interval so it will be recomputed with
+ // subranges.
+ if (MO.getSubReg() && hasInterval(Reg) &&
+ !getInterval(Reg).hasSubRanges() &&
+ MRI->shouldTrackSubRegLiveness(Reg))
+ removeInterval(Reg);
+ if (!hasInterval(Reg)) {
+ createAndComputeVirtRegInterval(Reg);
+ // Don't bother to repair a freshly calculated live interval.
+ erase_value(RegsToRepair, Reg);
+ }
}
}
}
- for (Register Reg : OrigRegs) {
+ for (Register Reg : RegsToRepair) {
if (!Reg.isVirtual())
continue;
@@ -1704,6 +1707,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
for (LiveInterval::SubRange &S : LI.subranges())
repairOldRegInRange(Begin, End, EndIdx, S, Reg, S.LaneMask);
+ LI.removeEmptySubRanges();
repairOldRegInRange(Begin, End, EndIdx, LI, Reg);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp
index c0c7848139e4..d4848f16dcf2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -81,22 +81,24 @@ void LivePhysRegs::stepForward(const MachineInstr &MI,
SmallVectorImpl<std::pair<MCPhysReg, const MachineOperand*>> &Clobbers) {
// Remove killed registers from the set.
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
- if (O->isReg() && !O->isDebug()) {
+ if (O->isReg()) {
+ if (O->isDebug())
+ continue;
Register Reg = O->getReg();
- if (!Register::isPhysicalRegister(Reg))
+ if (!Reg.isPhysical())
continue;
if (O->isDef()) {
// Note, dead defs are still recorded. The caller should decide how to
// handle them.
Clobbers.push_back(std::make_pair(Reg, &*O));
} else {
- if (!O->isKill())
- continue;
assert(O->isUse());
- removeReg(Reg);
+ if (O->isKill())
+ removeReg(Reg);
}
- } else if (O->isRegMask())
+ } else if (O->isRegMask()) {
removeRegsInMask(*O, &Clobbers);
+ }
}
// Add defs to the set.
@@ -250,7 +252,7 @@ void llvm::computeLiveIns(LivePhysRegs &LiveRegs,
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
LiveRegs.init(TRI);
LiveRegs.addLiveOutsNoPristines(MBB);
- for (const MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend()))
+ for (const MachineInstr &MI : llvm::reverse(MBB))
LiveRegs.stepBackward(MI);
}
@@ -287,7 +289,7 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) {
LiveRegs.init(TRI);
LiveRegs.addLiveOutsNoPristines(MBB);
- for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
// Recompute dead flags.
for (MIBundleOperands MO(MI); MO.isValid(); ++MO) {
if (!MO->isReg() || !MO->isDef() || MO->isDebug())
@@ -296,7 +298,7 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) {
Register Reg = MO->getReg();
if (Reg == 0)
continue;
- assert(Register::isPhysicalRegister(Reg));
+ assert(Reg.isPhysical());
bool IsNotLive = LiveRegs.available(MRI, Reg);
@@ -325,7 +327,7 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) {
Register Reg = MO->getReg();
if (Reg == 0)
continue;
- assert(Register::isPhysicalRegister(Reg));
+ assert(Reg.isPhysical());
bool IsNotLive = LiveRegs.available(MRI, Reg);
MO->setIsKill(IsNotLive);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 64a2dd275643..d91ff734ad8f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -107,7 +107,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
SlotIndex OrigIdx,
SlotIndex UseIdx) const {
OrigIdx = OrigIdx.getRegSlot(true);
- UseIdx = UseIdx.getRegSlot(true);
+ UseIdx = std::max(UseIdx, UseIdx.getRegSlot(true));
for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = OrigMI->getOperand(i);
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
@@ -305,17 +305,18 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
isOrigDef = SlotIndex::isSameInstr(OrigVNI->def, Idx);
}
+ bool HasLiveVRegUses = false;
+
// Check for live intervals that may shrink
- for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end(); MOI != MOE; ++MOI) {
- if (!MOI->isReg())
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg())
continue;
- Register Reg = MOI->getReg();
+ Register Reg = MO.getReg();
if (!Register::isVirtualRegister(Reg)) {
// Check if MI reads any unreserved physregs.
- if (Reg && MOI->readsReg() && !MRI.isReserved(Reg))
+ if (Reg && MO.readsReg() && !MRI.isReserved(Reg))
ReadsPhysRegs = true;
- else if (MOI->isDef())
+ else if (MO.isDef())
LIS.removePhysRegDefAt(Reg.asMCReg(), Idx);
continue;
}
@@ -325,12 +326,14 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
// unlikely to change anything. We typically don't want to shrink the
// PIC base register that has lots of uses everywhere.
// Always shrink COPY uses that probably come from live range splitting.
- if ((MI->readsVirtualRegister(Reg) && (MI->isCopy() || MOI->isDef())) ||
- (MOI->readsReg() && (MRI.hasOneNonDBGUse(Reg) || useIsKill(LI, *MOI))))
+ if ((MI->readsVirtualRegister(Reg) && (MI->isCopy() || MO.isDef())) ||
+ (MO.readsReg() && (MRI.hasOneNonDBGUse(Reg) || useIsKill(LI, MO))))
ToShrink.insert(&LI);
+ else if (MO.readsReg())
+ HasLiveVRegUses = true;
// Remove defined value.
- if (MOI->isDef()) {
+ if (MO.isDef()) {
if (TheDelegate && LI.getVNInfoAt(Idx) != nullptr)
TheDelegate->LRE_WillShrinkVirtReg(LI.reg());
LIS.removeVRegDefAt(LI, Idx);
@@ -362,7 +365,11 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
// the inst for remat of other siblings. The inst is saved in
// LiveRangeEdit::DeadRemats and will be deleted after all the
// allocations of the func are done.
- if (isOrigDef && DeadRemats && TII.isTriviallyReMaterializable(*MI, AA)) {
+ // However, immediately delete instructions which have unshrunk virtual
+ // register uses. That may provoke RA to split an interval at the KILL
+ // and later result in an invalid live segment end.
+ if (isOrigDef && DeadRemats && !HasLiveVRegUses &&
+ TII.isTriviallyReMaterializable(*MI, AA)) {
LiveInterval &NewLI = createEmptyIntervalFrom(Dest, false);
VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI));
@@ -405,8 +412,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
break;
// Shrink just one live interval. Then delete new dead defs.
- LiveInterval *LI = ToShrink.back();
- ToShrink.pop_back();
+ LiveInterval *LI = ToShrink.pop_back_val();
if (foldAsLoad(LI, Dead))
continue;
unsigned VReg = LI->reg();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
index 7181dbc9c870..51ba4b7e53eb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
@@ -119,8 +119,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList);
while (!WorkList.empty()) {
- MachineBasicBlock *Pred = WorkList.back();
- WorkList.pop_back();
+ MachineBasicBlock *Pred = WorkList.pop_back_val();
MarkVirtRegAliveInBlock(VRInfo, DefBlock, Pred, WorkList);
}
}
@@ -484,8 +483,7 @@ void LiveVariables::HandlePhysRegDef(Register Reg, MachineInstr *MI,
void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI,
SmallVectorImpl<unsigned> &Defs) {
while (!Defs.empty()) {
- Register Reg = Defs.back();
- Defs.pop_back();
+ Register Reg = Defs.pop_back_val();
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs) {
unsigned SubReg = *SubRegs;
@@ -671,6 +669,86 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
return false;
}
+void LiveVariables::recomputeForSingleDefVirtReg(Register Reg) {
+ assert(Reg.isVirtual());
+
+ VarInfo &VI = getVarInfo(Reg);
+ VI.AliveBlocks.clear();
+ VI.Kills.clear();
+
+ MachineInstr &DefMI = *MRI->getUniqueVRegDef(Reg);
+ MachineBasicBlock &DefBB = *DefMI.getParent();
+
+ // Handle the case where all uses have been removed.
+ if (MRI->use_nodbg_empty(Reg)) {
+ VI.Kills.push_back(&DefMI);
+ DefMI.addRegisterDead(Reg, nullptr);
+ return;
+ }
+ DefMI.clearRegisterDeads(Reg);
+
+ // Initialize a worklist of BBs that Reg is live-to-end of. (Here
+ // "live-to-end" means Reg is live at the end of a block even if it is only
+ // live because of phi uses in a successor. This is different from isLiveOut()
+ // which does not consider phi uses.)
+ SmallVector<MachineBasicBlock *> LiveToEndBlocks;
+ SparseBitVector<> UseBlocks;
+ for (auto &UseMO : MRI->use_nodbg_operands(Reg)) {
+ UseMO.setIsKill(false);
+ MachineInstr &UseMI = *UseMO.getParent();
+ MachineBasicBlock &UseBB = *UseMI.getParent();
+ UseBlocks.set(UseBB.getNumber());
+ if (UseMI.isPHI()) {
+ // If Reg is used in a phi then it is live-to-end of the corresponding
+ // predecessor.
+ unsigned Idx = UseMI.getOperandNo(&UseMO);
+ LiveToEndBlocks.push_back(UseMI.getOperand(Idx + 1).getMBB());
+ } else if (&UseBB == &DefBB) {
+ // A non-phi use in the same BB as the single def must come after the def.
+ } else {
+ // Otherwise Reg must be live-to-end of all predecessors.
+ LiveToEndBlocks.append(UseBB.pred_begin(), UseBB.pred_end());
+ }
+ }
+
+ // Iterate over the worklist adding blocks to AliveBlocks.
+ bool LiveToEndOfDefBB = false;
+ while (!LiveToEndBlocks.empty()) {
+ MachineBasicBlock &BB = *LiveToEndBlocks.pop_back_val();
+ if (&BB == &DefBB) {
+ LiveToEndOfDefBB = true;
+ continue;
+ }
+ if (VI.AliveBlocks.test(BB.getNumber()))
+ continue;
+ VI.AliveBlocks.set(BB.getNumber());
+ LiveToEndBlocks.append(BB.pred_begin(), BB.pred_end());
+ }
+
+ // Recompute kill flags. For each block in which Reg is used but is not
+ // live-through, find the last instruction that uses Reg. Ignore phi nodes
+ // because they should not be included in Kills.
+ for (unsigned UseBBNum : UseBlocks) {
+ if (VI.AliveBlocks.test(UseBBNum))
+ continue;
+ MachineBasicBlock &UseBB = *MF->getBlockNumbered(UseBBNum);
+ if (&UseBB == &DefBB && LiveToEndOfDefBB)
+ continue;
+ for (auto &MI : reverse(UseBB)) {
+ if (MI.isDebugOrPseudoInstr())
+ continue;
+ if (MI.isPHI())
+ break;
+ if (MI.readsRegister(Reg)) {
+ assert(!MI.killsRegister(Reg));
+ MI.addRegisterKilled(Reg, nullptr);
+ VI.Kills.push_back(&MI);
+ break;
+ }
+ }
+ }
+}
+
/// replaceKillInstruction - Update register kill info by replacing a kill
/// instruction with a new one.
void LiveVariables::replaceKillInstruction(Register Reg, MachineInstr &OldMI,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LoopTraversal.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LoopTraversal.cpp
index 9490dfc40a82..0d400253c652 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LoopTraversal.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LoopTraversal.cpp
@@ -39,8 +39,7 @@ LoopTraversal::TraversalOrder LoopTraversal::traverse(MachineFunction &MF) {
bool Primary = true;
Workqueue.push_back(MBB);
while (!Workqueue.empty()) {
- MachineBasicBlock *ActiveMBB = &*Workqueue.back();
- Workqueue.pop_back();
+ MachineBasicBlock *ActiveMBB = Workqueue.pop_back_val();
bool Done = isBlockDone(ActiveMBB);
MBBTraversalOrder.push_back(TraversedMBBInfo(ActiveMBB, Primary, Done));
for (MachineBasicBlock *Succ : ActiveMBB->successors()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
index 62e9c6b629d3..dce64ab9f5ca 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
@@ -52,6 +52,16 @@ MVT llvm::getMVTForLLT(LLT Ty) {
Ty.getNumElements());
}
+EVT llvm::getApproximateEVTForLLT(LLT Ty, const DataLayout &DL,
+ LLVMContext &Ctx) {
+ if (Ty.isVector()) {
+ EVT EltVT = getApproximateEVTForLLT(Ty.getElementType(), DL, Ctx);
+ return EVT::getVectorVT(Ctx, EltVT, Ty.getElementCount());
+ }
+
+ return EVT::getIntegerVT(Ctx, Ty.getSizeInBits());
+}
+
LLT llvm::getLLTForMVT(MVT Ty) {
if (!Ty.isVector())
return LLT::scalar(Ty.getSizeInBits());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
index 8ef6aca602a1..3ec8c627f131 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -38,10 +38,6 @@
using namespace llvm;
-namespace llvm {
-extern char &MIRCanonicalizerID;
-} // namespace llvm
-
#define DEBUG_TYPE "mir-canonicalizer"
static cl::opt<unsigned>
@@ -332,8 +328,8 @@ static bool propagateLocalCopies(MachineBasicBlock *MBB) {
continue;
std::vector<MachineOperand *> Uses;
- for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI)
- Uses.push_back(&*UI);
+ for (MachineOperand &MO : MRI.use_operands(Dst))
+ Uses.push_back(&MO);
for (auto *MO : Uses)
MO->setReg(Src);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 87fde7d39a60..0ca820f160aa 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -261,6 +261,8 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("liveout", MIToken::kw_liveout)
.Case("address-taken", MIToken::kw_address_taken)
.Case("landing-pad", MIToken::kw_landing_pad)
+ .Case("inlineasm-br-indirect-target",
+ MIToken::kw_inlineasm_br_indirect_target)
.Case("ehfunclet-entry", MIToken::kw_ehfunclet_entry)
.Case("liveins", MIToken::kw_liveins)
.Case("successors", MIToken::kw_successors)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
index 68425b41c3fb..70d17f819ce3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -116,6 +116,7 @@ struct MIToken {
kw_liveout,
kw_address_taken,
kw_landing_pad,
+ kw_inlineasm_br_indirect_target,
kw_ehfunclet_entry,
kw_liveins,
kw_successors,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 34e1f9225d42..1a04e1ca56a9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -498,7 +498,7 @@ public:
MachineOperand &Dest,
Optional<unsigned> &TiedDefIdx);
bool parseOffset(int64_t &Offset);
- bool parseAlignment(unsigned &Alignment);
+ bool parseAlignment(uint64_t &Alignment);
bool parseAddrspace(unsigned &Addrspace);
bool parseSectionID(Optional<MBBSectionID> &SID);
bool parseOperandsOffset(MachineOperand &Op);
@@ -674,9 +674,10 @@ bool MIParser::parseBasicBlockDefinition(
lex();
bool HasAddressTaken = false;
bool IsLandingPad = false;
+ bool IsInlineAsmBrIndirectTarget = false;
bool IsEHFuncletEntry = false;
Optional<MBBSectionID> SectionID;
- unsigned Alignment = 0;
+ uint64_t Alignment = 0;
BasicBlock *BB = nullptr;
if (consumeIfPresent(MIToken::lparen)) {
do {
@@ -690,6 +691,10 @@ bool MIParser::parseBasicBlockDefinition(
IsLandingPad = true;
lex();
break;
+ case MIToken::kw_inlineasm_br_indirect_target:
+ IsInlineAsmBrIndirectTarget = true;
+ lex();
+ break;
case MIToken::kw_ehfunclet_entry:
IsEHFuncletEntry = true;
lex();
@@ -737,6 +742,7 @@ bool MIParser::parseBasicBlockDefinition(
if (HasAddressTaken)
MBB->setHasAddressTaken();
MBB->setIsEHPad(IsLandingPad);
+ MBB->setIsInlineAsmBrIndirectTarget(IsInlineAsmBrIndirectTarget);
MBB->setIsEHFuncletEntry(IsEHFuncletEntry);
if (SectionID.hasValue()) {
MBB->setSectionID(SectionID.getValue());
@@ -1011,10 +1017,6 @@ bool MIParser::parse(MachineInstr *&MI) {
Optional<unsigned> TiedDefIdx;
if (parseMachineOperandAndTargetFlags(OpCode, Operands.size(), MO, TiedDefIdx))
return true;
- if ((OpCode == TargetOpcode::DBG_VALUE ||
- OpCode == TargetOpcode::DBG_VALUE_LIST) &&
- MO.isReg())
- MO.setIsDebug();
Operands.push_back(
ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx));
if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
@@ -2898,16 +2900,16 @@ bool MIParser::parseOffset(int64_t &Offset) {
return false;
}
-bool MIParser::parseAlignment(unsigned &Alignment) {
+bool MIParser::parseAlignment(uint64_t &Alignment) {
assert(Token.is(MIToken::kw_align) || Token.is(MIToken::kw_basealign));
lex();
if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
return error("expected an integer literal after 'align'");
- if (getUnsigned(Alignment))
+ if (getUint64(Alignment))
return true;
lex();
- if (!isPowerOf2_32(Alignment))
+ if (!isPowerOf2_64(Alignment))
return error("expected a power-of-2 literal after 'align'");
return false;
@@ -3261,7 +3263,7 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
if (parseMachinePointerInfo(Ptr))
return true;
}
- unsigned BaseAlignment =
+ uint64_t BaseAlignment =
(Size != MemoryLocation::UnknownSize ? PowerOf2Ceil(Size) : 1);
AAMDNodes AAInfo;
MDNode *Range = nullptr;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index d77104752880..6221b5929301 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -454,6 +454,9 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
MF.getProperties().set(MachineFunctionProperties::Property::Selected);
if (YamlMF.FailedISel)
MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+ if (YamlMF.FailsVerification)
+ MF.getProperties().set(
+ MachineFunctionProperties::Property::FailsVerification);
PerFunctionMIParsingState PFS(MF, SM, IRSlots, *Target);
if (parseRegisterInfo(PFS, YamlMF))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
index 2a78bb62762a..f1369396e37f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -217,6 +217,8 @@ void MIRPrinter::print(const MachineFunction &MF) {
MachineFunctionProperties::Property::Selected);
YamlMF.FailedISel = MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel);
+ YamlMF.FailsVerification = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailsVerification);
convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo());
MachineModuleSlotTracker MST(&MF);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp
new file mode 100644
index 000000000000..90ecc6fc68fc
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp
@@ -0,0 +1,343 @@
+//===-------- MIRSampleProfile.cpp: MIRSampleFDO (For FSAFDO) -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the MIRSampleProfile loader, mainly
+// for flow sensitive SampleFDO.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MIRSampleProfile.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
+#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
+
+using namespace llvm;
+using namespace sampleprof;
+using namespace llvm::sampleprofutil;
+using ProfileCount = Function::ProfileCount;
+
+#define DEBUG_TYPE "fs-profile-loader"
+
+static cl::opt<bool> ShowFSBranchProb(
+ "show-fs-branchprob", cl::Hidden, cl::init(false),
+ cl::desc("Print setting flow sensitive branch probabilities"));
+static cl::opt<unsigned> FSProfileDebugProbDiffThreshold(
+ "fs-profile-debug-prob-diff-threshold", cl::init(10),
+ cl::desc("Only show debug message if the branch probility is greater than "
+ "this value (in percentage)."));
+
+static cl::opt<unsigned> FSProfileDebugBWThreshold(
+ "fs-profile-debug-bw-threshold", cl::init(10000),
+ cl::desc("Only show debug message if the source branch weight is greater "
+ " than this value."));
+
+static cl::opt<bool> ViewBFIBefore("fs-viewbfi-before", cl::Hidden,
+ cl::init(false),
+ cl::desc("View BFI before MIR loader"));
+static cl::opt<bool> ViewBFIAfter("fs-viewbfi-after", cl::Hidden,
+ cl::init(false),
+ cl::desc("View BFI after MIR loader"));
+
+char MIRProfileLoaderPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(MIRProfileLoaderPass, DEBUG_TYPE,
+ "Load MIR Sample Profile",
+ /* cfg = */ false, /* is_analysis = */ false)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
+INITIALIZE_PASS_END(MIRProfileLoaderPass, DEBUG_TYPE, "Load MIR Sample Profile",
+ /* cfg = */ false, /* is_analysis = */ false)
+
+char &llvm::MIRProfileLoaderPassID = MIRProfileLoaderPass::ID;
+
+FunctionPass *llvm::createMIRProfileLoaderPass(std::string File,
+ std::string RemappingFile,
+ FSDiscriminatorPass P) {
+ return new MIRProfileLoaderPass(File, RemappingFile, P);
+}
+
+namespace llvm {
+
+// Internal option used to control BFI display only after MBP pass.
+// Defined in CodeGen/MachineBlockFrequencyInfo.cpp:
+// -view-block-layout-with-bfi={none | fraction | integer | count}
+extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI;
+
+// Command line option to specify the name of the function for CFG dump
+// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
+extern cl::opt<std::string> ViewBlockFreqFuncName;
+
+namespace afdo_detail {
+template <> struct IRTraits<MachineBasicBlock> {
+ using InstructionT = MachineInstr;
+ using BasicBlockT = MachineBasicBlock;
+ using FunctionT = MachineFunction;
+ using BlockFrequencyInfoT = MachineBlockFrequencyInfo;
+ using LoopT = MachineLoop;
+ using LoopInfoPtrT = MachineLoopInfo *;
+ using DominatorTreePtrT = MachineDominatorTree *;
+ using PostDominatorTreePtrT = MachinePostDominatorTree *;
+ using PostDominatorTreeT = MachinePostDominatorTree;
+ using OptRemarkEmitterT = MachineOptimizationRemarkEmitter;
+ using OptRemarkAnalysisT = MachineOptimizationRemarkAnalysis;
+ using PredRangeT = iterator_range<std::vector<MachineBasicBlock *>::iterator>;
+ using SuccRangeT = iterator_range<std::vector<MachineBasicBlock *>::iterator>;
+ static Function &getFunction(MachineFunction &F) { return F.getFunction(); }
+ static const MachineBasicBlock *getEntryBB(const MachineFunction *F) {
+ return GraphTraits<const MachineFunction *>::getEntryNode(F);
+ }
+ static PredRangeT getPredecessors(MachineBasicBlock *BB) {
+ return BB->predecessors();
+ }
+ static SuccRangeT getSuccessors(MachineBasicBlock *BB) {
+ return BB->successors();
+ }
+};
+} // namespace afdo_detail
+
+class MIRProfileLoader final
+ : public SampleProfileLoaderBaseImpl<MachineBasicBlock> {
+public:
+ void setInitVals(MachineDominatorTree *MDT, MachinePostDominatorTree *MPDT,
+ MachineLoopInfo *MLI, MachineBlockFrequencyInfo *MBFI,
+ MachineOptimizationRemarkEmitter *MORE) {
+ DT = MDT;
+ PDT = MPDT;
+ LI = MLI;
+ BFI = MBFI;
+ ORE = MORE;
+ }
+ void setFSPass(FSDiscriminatorPass Pass) {
+ P = Pass;
+ LowBit = getFSPassBitBegin(P);
+ HighBit = getFSPassBitEnd(P);
+ assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
+ }
+
+ MIRProfileLoader(StringRef Name, StringRef RemapName)
+ : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)) {
+ }
+
+ void setBranchProbs(MachineFunction &F);
+ bool runOnFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool isValid() const { return ProfileIsValid; }
+
+protected:
+ friend class SampleCoverageTracker;
+
+ /// Hold the information of the basic block frequency.
+ MachineBlockFrequencyInfo *BFI;
+
+ /// PassNum is the sequence number this pass is called, start from 1.
+ FSDiscriminatorPass P;
+
+ // LowBit in the FS discriminator used by this instance. Note the number is
+ // 0-based. Base discrimnator use bit 0 to bit 11.
+ unsigned LowBit;
+ // HighwBit in the FS discriminator used by this instance. Note the number
+ // is 0-based.
+ unsigned HighBit;
+
+ bool ProfileIsValid = true;
+};
+
+template <>
+void SampleProfileLoaderBaseImpl<
+ MachineBasicBlock>::computeDominanceAndLoopInfo(MachineFunction &F) {}
+
+void MIRProfileLoader::setBranchProbs(MachineFunction &F) {
+ LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch probs\n");
+ for (auto &BI : F) {
+ MachineBasicBlock *BB = &BI;
+ if (BB->succ_size() < 2)
+ continue;
+ const MachineBasicBlock *EC = EquivalenceClass[BB];
+ uint64_t BBWeight = BlockWeights[EC];
+ uint64_t SumEdgeWeight = 0;
+ for (MachineBasicBlock *Succ : BB->successors()) {
+ Edge E = std::make_pair(BB, Succ);
+ SumEdgeWeight += EdgeWeights[E];
+ }
+
+ if (BBWeight != SumEdgeWeight) {
+ LLVM_DEBUG(dbgs() << "BBweight is not equal to SumEdgeWeight: BBWWeight="
+ << BBWeight << " SumEdgeWeight= " << SumEdgeWeight
+ << "\n");
+ BBWeight = SumEdgeWeight;
+ }
+ if (BBWeight == 0) {
+ LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
+ continue;
+ }
+
+#ifndef NDEBUG
+ uint64_t BBWeightOrig = BBWeight;
+#endif
+ uint32_t MaxWeight = std::numeric_limits<uint32_t>::max();
+ uint32_t Factor = 1;
+ if (BBWeight > MaxWeight) {
+ Factor = BBWeight / MaxWeight + 1;
+ BBWeight /= Factor;
+ LLVM_DEBUG(dbgs() << "Scaling weights by " << Factor << "\n");
+ }
+
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end();
+ SI != SE; ++SI) {
+ MachineBasicBlock *Succ = *SI;
+ Edge E = std::make_pair(BB, Succ);
+ uint64_t EdgeWeight = EdgeWeights[E];
+ EdgeWeight /= Factor;
+
+ assert(BBWeight >= EdgeWeight &&
+ "BBweight is larger than EdgeWeight -- should not happen.\n");
+
+ BranchProbability OldProb = BFI->getMBPI()->getEdgeProbability(BB, SI);
+ BranchProbability NewProb(EdgeWeight, BBWeight);
+ if (OldProb == NewProb)
+ continue;
+ BB->setSuccProbability(SI, NewProb);
+#ifndef NDEBUG
+ if (!ShowFSBranchProb)
+ continue;
+ bool Show = false;
+ BranchProbability Diff;
+ if (OldProb > NewProb)
+ Diff = OldProb - NewProb;
+ else
+ Diff = NewProb - OldProb;
+ Show = (Diff >= BranchProbability(FSProfileDebugProbDiffThreshold, 100));
+ Show &= (BBWeightOrig >= FSProfileDebugBWThreshold);
+
+ auto DIL = BB->findBranchDebugLoc();
+ auto SuccDIL = Succ->findBranchDebugLoc();
+ if (Show) {
+ dbgs() << "Set branch fs prob: MBB (" << BB->getNumber() << " -> "
+ << Succ->getNumber() << "): ";
+ if (DIL)
+ dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
+ << DIL->getColumn();
+ if (SuccDIL)
+ dbgs() << "-->" << SuccDIL->getFilename() << ":" << SuccDIL->getLine()
+ << ":" << SuccDIL->getColumn();
+ dbgs() << " W=" << BBWeightOrig << " " << OldProb << " --> " << NewProb
+ << "\n";
+ }
+#endif
+ }
+ }
+}
+
+bool MIRProfileLoader::doInitialization(Module &M) {
+ auto &Ctx = M.getContext();
+
+ auto ReaderOrErr = sampleprof::SampleProfileReader::create(Filename, Ctx, P,
+ RemappingFilename);
+ if (std::error_code EC = ReaderOrErr.getError()) {
+ std::string Msg = "Could not open profile: " + EC.message();
+ Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
+ return false;
+ }
+
+ Reader = std::move(ReaderOrErr.get());
+ Reader->setModule(&M);
+ ProfileIsValid = (Reader->read() == sampleprof_error::success);
+ Reader->getSummary();
+
+ return true;
+}
+
+bool MIRProfileLoader::runOnFunction(MachineFunction &MF) {
+ Function &Func = MF.getFunction();
+ clearFunctionData(false);
+ Samples = Reader->getSamplesFor(Func);
+ if (!Samples || Samples->empty())
+ return false;
+
+ if (getFunctionLoc(MF) == 0)
+ return false;
+
+ DenseSet<GlobalValue::GUID> InlinedGUIDs;
+ bool Changed = computeAndPropagateWeights(MF, InlinedGUIDs);
+
+ // Set the new BPI, BFI.
+ setBranchProbs(MF);
+
+ return Changed;
+}
+
+} // namespace llvm
+
+MIRProfileLoaderPass::MIRProfileLoaderPass(std::string FileName,
+ std::string RemappingFileName,
+ FSDiscriminatorPass P)
+ : MachineFunctionPass(ID), ProfileFileName(FileName), P(P),
+ MIRSampleLoader(
+ std::make_unique<MIRProfileLoader>(FileName, RemappingFileName)) {
+ LowBit = getFSPassBitBegin(P);
+ HighBit = getFSPassBitEnd(P);
+ assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
+}
+
+bool MIRProfileLoaderPass::runOnMachineFunction(MachineFunction &MF) {
+ if (!MIRSampleLoader->isValid())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Func: "
+ << MF.getFunction().getName() << "\n");
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ MIRSampleLoader->setInitVals(
+ &getAnalysis<MachineDominatorTree>(),
+ &getAnalysis<MachinePostDominatorTree>(), &getAnalysis<MachineLoopInfo>(),
+ MBFI, &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE());
+
+ MF.RenumberBlocks();
+ if (ViewBFIBefore && ViewBlockLayoutWithBFI != GVDT_None &&
+ (ViewBlockFreqFuncName.empty() ||
+ MF.getFunction().getName().equals(ViewBlockFreqFuncName))) {
+ MBFI->view("MIR_Prof_loader_b." + MF.getName(), false);
+ }
+
+ bool Changed = MIRSampleLoader->runOnFunction(MF);
+
+ if (ViewBFIAfter && ViewBlockLayoutWithBFI != GVDT_None &&
+ (ViewBlockFreqFuncName.empty() ||
+ MF.getFunction().getName().equals(ViewBlockFreqFuncName))) {
+ MBFI->view("MIR_prof_loader_a." + MF.getName(), false);
+ }
+
+ return Changed;
+}
+
+bool MIRProfileLoaderPass::doInitialization(Module &M) {
+ LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Module " << M.getName()
+ << "\n");
+
+ MIRSampleLoader->setFSPass(P);
+ return MIRSampleLoader->doInitialization(M);
+}
+
+void MIRProfileLoaderPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequiredTransitive<MachineLoopInfo>();
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
index c6914dcd0e54..23c511aaa056 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -134,9 +134,8 @@ void ilist_callback_traits<MachineBasicBlock>::addNodeToList(
// Make sure the instructions have their operands in the reginfo lists.
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- for (MachineBasicBlock::instr_iterator
- I = N->instr_begin(), E = N->instr_end(); I != E; ++I)
- I->AddRegOperandsToUseLists(RegInfo);
+ for (MachineInstr &MI : N->instrs())
+ MI.AddRegOperandsToUseLists(RegInfo);
}
void ilist_callback_traits<MachineBasicBlock>::removeNodeFromList(
@@ -281,8 +280,8 @@ MachineBasicBlock::getLastNonDebugInstr(bool SkipPseudoOp) {
}
bool MachineBasicBlock::hasEHPadSuccessor() const {
- for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
- if ((*I)->isEHPad())
+ for (const MachineBasicBlock *Succ : successors())
+ if (Succ->isEHPad())
return true;
return false;
}
@@ -517,6 +516,11 @@ void MachineBasicBlock::printName(raw_ostream &os, unsigned printNameFlags,
os << "landing-pad";
hasAttributes = true;
}
+ if (isInlineAsmBrIndirectTarget()) {
+ os << (hasAttributes ? ", " : " (");
+ os << "inlineasm-br-indirect-target";
+ hasAttributes = true;
+ }
if (isEHFuncletEntry()) {
os << (hasAttributes ? ", " : " (");
os << "ehfunclet-entry";
@@ -1037,17 +1041,16 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
I != E; ++I) {
MachineInstr *MI = &*I;
- for (MachineInstr::mop_iterator OI = MI->operands_begin(),
- OE = MI->operands_end(); OI != OE; ++OI) {
- if (!OI->isReg() || OI->getReg() == 0 ||
- !OI->isUse() || !OI->isKill() || OI->isUndef())
+ for (MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse() || !MO.isKill() ||
+ MO.isUndef())
continue;
- Register Reg = OI->getReg();
+ Register Reg = MO.getReg();
if (Register::isPhysicalRegister(Reg) ||
LV->getVarInfo(Reg).removeKill(*MI)) {
KilledRegs.push_back(Reg);
- LLVM_DEBUG(dbgs() << "Removing terminator kill: " << *MI);
- OI->setIsKill(false);
+ LLVM_DEBUG(dbgs() << "Removing terminator kill: " << MI);
+ MO.setIsKill(false);
}
}
}
@@ -1058,12 +1061,11 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
I != E; ++I) {
MachineInstr *MI = &*I;
- for (MachineInstr::mop_iterator OI = MI->operands_begin(),
- OE = MI->operands_end(); OI != OE; ++OI) {
- if (!OI->isReg() || OI->getReg() == 0)
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || MO.getReg() == 0)
continue;
- Register Reg = OI->getReg();
+ Register Reg = MO.getReg();
if (!is_contained(UsedRegs, Reg))
UsedRegs.push_back(Reg);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index f61142d202eb..8a1b4031642d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -1185,7 +1185,7 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
// The integrated tail duplication is really designed for increasing
// fallthrough from predecessors from Succ to its successors. We may need
// other machanism to handle different cases.
- if (Succ->succ_size() == 0)
+ if (Succ->succ_empty())
return true;
// Plus the already placed predecessor.
@@ -2050,6 +2050,8 @@ MachineBlockPlacement::findBestLoopTopHelper(
BlockChain &HeaderChain = *BlockToChain[OldTop];
if (!LoopBlockSet.count(*HeaderChain.begin()))
return OldTop;
+ if (OldTop != *HeaderChain.begin())
+ return OldTop;
LLVM_DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(OldTop)
<< "\n");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
index cb2e18e8c813..0fcb07252d0e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
@@ -514,41 +514,38 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
SmallVector<unsigned, 2> ImplicitDefsToUpdate;
SmallVector<unsigned, 2> ImplicitDefs;
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
- MachineInstr *MI = &*I;
- ++I;
-
- if (!isCSECandidate(MI))
+ for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
+ if (!isCSECandidate(&MI))
continue;
- bool FoundCSE = VNT.count(MI);
+ bool FoundCSE = VNT.count(&MI);
if (!FoundCSE) {
// Using trivial copy propagation to find more CSE opportunities.
- if (PerformTrivialCopyPropagation(MI, MBB)) {
+ if (PerformTrivialCopyPropagation(&MI, MBB)) {
Changed = true;
// After coalescing MI itself may become a copy.
- if (MI->isCopyLike())
+ if (MI.isCopyLike())
continue;
// Try again to see if CSE is possible.
- FoundCSE = VNT.count(MI);
+ FoundCSE = VNT.count(&MI);
}
}
// Commute commutable instructions.
bool Commuted = false;
- if (!FoundCSE && MI->isCommutable()) {
- if (MachineInstr *NewMI = TII->commuteInstruction(*MI)) {
+ if (!FoundCSE && MI.isCommutable()) {
+ if (MachineInstr *NewMI = TII->commuteInstruction(MI)) {
Commuted = true;
FoundCSE = VNT.count(NewMI);
- if (NewMI != MI) {
+ if (NewMI != &MI) {
// New instruction. It doesn't need to be kept.
NewMI->eraseFromParent();
Changed = true;
} else if (!FoundCSE)
// MI was changed but it didn't help, commute it back!
- (void)TII->commuteInstruction(*MI);
+ (void)TII->commuteInstruction(MI);
}
}
@@ -559,8 +556,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
SmallSet<MCRegister, 8> PhysRefs;
PhysDefVector PhysDefs;
bool PhysUseDef = false;
- if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
- PhysDefs, PhysUseDef)) {
+ if (FoundCSE &&
+ hasLivePhysRegDefUses(&MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) {
FoundCSE = false;
// ... Unless the CS is local or is in the sole predecessor block
@@ -569,23 +566,23 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
// This can never be the case if the instruction both uses and
// defines the same physical register, which was detected above.
if (!PhysUseDef) {
- unsigned CSVN = VNT.lookup(MI);
+ unsigned CSVN = VNT.lookup(&MI);
MachineInstr *CSMI = Exps[CSVN];
- if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
+ if (PhysRegDefsReach(CSMI, &MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
FoundCSE = true;
}
}
if (!FoundCSE) {
- VNT.insert(MI, CurrVN++);
- Exps.push_back(MI);
+ VNT.insert(&MI, CurrVN++);
+ Exps.push_back(&MI);
continue;
}
// Found a common subexpression, eliminate it.
- unsigned CSVN = VNT.lookup(MI);
+ unsigned CSVN = VNT.lookup(&MI);
MachineInstr *CSMI = Exps[CSVN];
- LLVM_DEBUG(dbgs() << "Examining: " << *MI);
+ LLVM_DEBUG(dbgs() << "Examining: " << MI);
LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
// Prevent CSE-ing non-local convergent instructions.
@@ -597,20 +594,20 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
// definition, so it's necessary to use `isConvergent` to prevent illegally
// CSE-ing the subset of `isConvergent` instructions which do fall into this
// extended definition.
- if (MI->isConvergent() && MI->getParent() != CSMI->getParent()) {
+ if (MI.isConvergent() && MI.getParent() != CSMI->getParent()) {
LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in "
"different BBs, avoid CSE!\n");
- VNT.insert(MI, CurrVN++);
- Exps.push_back(MI);
+ VNT.insert(&MI, CurrVN++);
+ Exps.push_back(&MI);
continue;
}
// Check if it's profitable to perform this CSE.
bool DoCSE = true;
- unsigned NumDefs = MI->getNumDefs();
+ unsigned NumDefs = MI.getNumDefs();
- for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isDef())
continue;
Register OldReg = MO.getReg();
@@ -635,7 +632,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
Register::isVirtualRegister(NewReg) &&
"Do not CSE physical register defs!");
- if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) {
+ if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), &MI)) {
LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
DoCSE = false;
break;
@@ -674,7 +671,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
for (const auto &PhysDef : PhysDefs)
- if (!MI->getOperand(PhysDef.first).isDead())
+ if (!MI.getOperand(PhysDef.first).isDead())
CSMI->getOperand(PhysDef.first).setIsDead(false);
// Go through implicit defs of CSMI and MI, and clear the kill flags on
@@ -687,8 +684,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
// Since we eliminated MI, and reused a register imp-def'd by CSMI
// (here %nzcv), that register, if it was killed before MI, should have
// that kill flag removed, because it's lifetime was extended.
- if (CSMI->getParent() == MI->getParent()) {
- for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II)
+ if (CSMI->getParent() == MI.getParent()) {
+ for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II)
for (auto ImplicitDef : ImplicitDefs)
if (MachineOperand *MO = II->findRegisterUseOperand(
ImplicitDef, /*isKill=*/true, TRI))
@@ -711,7 +708,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
++NumCrossBBCSEs;
}
- MI->eraseFromParent();
+ MI.eraseFromParent();
++NumCSEs;
if (!PhysRefs.empty())
++NumPhysCSEs;
@@ -719,8 +716,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
++NumCommutes;
Changed = true;
} else {
- VNT.insert(MI, CurrVN++);
- Exps.push_back(MI);
+ VNT.insert(&MI, CurrVN++);
+ Exps.push_back(&MI);
}
CSEPairs.clear();
ImplicitDefsToUpdate.clear();
@@ -807,19 +804,16 @@ bool MachineCSE::isPRECandidate(MachineInstr *MI) {
bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
MachineBasicBlock *MBB) {
bool Changed = false;
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
- MachineInstr *MI = &*I;
- ++I;
-
- if (!isPRECandidate(MI))
+ for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
+ if (!isPRECandidate(&MI))
continue;
- if (!PREMap.count(MI)) {
- PREMap[MI] = MBB;
+ if (!PREMap.count(&MI)) {
+ PREMap[&MI] = MBB;
continue;
}
- auto MBB1 = PREMap[MI];
+ auto MBB1 = PREMap[&MI];
assert(
!DT->properlyDominates(MBB, MBB1) &&
"MBB cannot properly dominate MBB1 while DFS through dominators tree!");
@@ -844,17 +838,17 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
// it's necessary to use `isConvergent` to prevent illegally PRE-ing the
// subset of `isConvergent` instructions which do fall into this
// extended definition.
- if (MI->isConvergent() && CMBB != MBB)
+ if (MI.isConvergent() && CMBB != MBB)
continue;
- assert(MI->getOperand(0).isDef() &&
+ assert(MI.getOperand(0).isDef() &&
"First operand of instr with one explicit def must be this def");
- Register VReg = MI->getOperand(0).getReg();
+ Register VReg = MI.getOperand(0).getReg();
Register NewReg = MRI->cloneVirtualRegister(VReg);
- if (!isProfitableToCSE(NewReg, VReg, CMBB, MI))
+ if (!isProfitableToCSE(NewReg, VReg, CMBB, &MI))
continue;
MachineInstr &NewMI =
- TII->duplicate(*CMBB, CMBB->getFirstTerminator(), *MI);
+ TII->duplicate(*CMBB, CMBB->getFirstTerminator(), MI);
// When hoisting, make sure we don't carry the debug location of
// the original instruction, as that's not correct and can cause
@@ -864,7 +858,7 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
NewMI.getOperand(0).setReg(NewReg);
- PREMap[MI] = CMBB;
+ PREMap[&MI] = CMBB;
++NumPREs;
Changed = true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 10b74f5f47f5..7c83bacd80d9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -414,6 +414,31 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
if (!UseI.isCopy())
return false;
+ const TargetRegisterClass *CopySrcRC =
+ TRI->getMinimalPhysRegClass(CopySrcReg);
+ const TargetRegisterClass *UseDstRC =
+ TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg());
+ const TargetRegisterClass *CrossCopyRC = TRI->getCrossCopyRegClass(CopySrcRC);
+
+ // If cross copy register class is not the same as copy source register class
+ // then it is not possible to copy the register directly and requires a cross
+ // register class copy. Fowarding this copy without checking register class of
+ // UseDst may create additional cross register copies when expanding the copy
+ // instruction in later passes.
+ if (CopySrcRC != CrossCopyRC) {
+ const TargetRegisterClass *CopyDstRC =
+ TRI->getMinimalPhysRegClass(Copy.getOperand(0).getReg());
+
+ // Check if UseDstRC matches the necessary register class to copy from
+ // CopySrc's register class. If so then forwarding the copy will not
+ // introduce any cross-class copys. Else if CopyDstRC matches then keep the
+ // copy and do not forward. If neither UseDstRC or CopyDstRC matches then
+ // we may need a cross register copy later but we do not worry about it
+ // here.
+ if (UseDstRC != CrossCopyRC && CopyDstRC == CrossCopyRC)
+ return false;
+ }
+
/// COPYs don't have register class constraints, so if the user instruction
/// is a COPY, we just try to avoid introducing additional cross-class
/// COPYs. For example:
@@ -430,9 +455,6 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
///
/// so we have reduced the number of cross-class COPYs and potentially
/// introduced a nop COPY that can be removed.
- const TargetRegisterClass *UseDstRC =
- TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg());
-
const TargetRegisterClass *SuperRC = UseDstRC;
for (TargetRegisterClass::sc_iterator SuperRCI = UseDstRC->getSuperClasses();
SuperRC; SuperRC = *SuperRCI++)
@@ -554,6 +576,7 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
MOUse.setReg(CopySrcReg);
if (!CopySrc.isRenamable())
MOUse.setIsRenamable(false);
+ MOUse.setIsUndef(CopySrc.isUndef());
LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n");
@@ -571,19 +594,16 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
LLVM_DEBUG(dbgs() << "MCP: ForwardCopyPropagateBlock " << MBB.getName()
<< "\n");
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
- MachineInstr *MI = &*I;
- ++I;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
// Analyze copies (which don't overlap themselves).
- if (MI->isCopy() && !TRI->regsOverlap(MI->getOperand(0).getReg(),
- MI->getOperand(1).getReg())) {
- assert(MI->getOperand(0).getReg().isPhysical() &&
- MI->getOperand(1).getReg().isPhysical() &&
+ if (MI.isCopy() && !TRI->regsOverlap(MI.getOperand(0).getReg(),
+ MI.getOperand(1).getReg())) {
+ assert(MI.getOperand(0).getReg().isPhysical() &&
+ MI.getOperand(1).getReg().isPhysical() &&
"MachineCopyPropagation should be run after register allocation!");
- MCRegister Def = MI->getOperand(0).getReg().asMCReg();
- MCRegister Src = MI->getOperand(1).getReg().asMCReg();
+ MCRegister Def = MI.getOperand(0).getReg().asMCReg();
+ MCRegister Src = MI.getOperand(1).getReg().asMCReg();
// The two copies cancel out and the source of the first copy
// hasn't been overridden, eliminate the second one. e.g.
@@ -600,31 +620,31 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
// %ecx = COPY %eax
// =>
// %ecx = COPY %eax
- if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def))
+ if (eraseIfRedundant(MI, Def, Src) || eraseIfRedundant(MI, Src, Def))
continue;
- forwardUses(*MI);
+ forwardUses(MI);
// Src may have been changed by forwardUses()
- Src = MI->getOperand(1).getReg().asMCReg();
+ Src = MI.getOperand(1).getReg().asMCReg();
// If Src is defined by a previous copy, the previous copy cannot be
// eliminated.
- ReadRegister(Src, *MI, RegularUse);
- for (const MachineOperand &MO : MI->implicit_operands()) {
+ ReadRegister(Src, MI, RegularUse);
+ for (const MachineOperand &MO : MI.implicit_operands()) {
if (!MO.isReg() || !MO.readsReg())
continue;
MCRegister Reg = MO.getReg().asMCReg();
if (!Reg)
continue;
- ReadRegister(Reg, *MI, RegularUse);
+ ReadRegister(Reg, MI, RegularUse);
}
- LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump());
+ LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI.dump());
// Copy is now a candidate for deletion.
if (!MRI->isReserved(Def))
- MaybeDeadCopies.insert(MI);
+ MaybeDeadCopies.insert(&MI);
// If 'Def' is previously source of another copy, then this earlier copy's
// source is no longer available. e.g.
@@ -634,7 +654,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
// ...
// %xmm2 = copy %xmm9
Tracker.clobberRegister(Def, *TRI);
- for (const MachineOperand &MO : MI->implicit_operands()) {
+ for (const MachineOperand &MO : MI.implicit_operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
MCRegister Reg = MO.getReg().asMCReg();
@@ -643,29 +663,29 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
Tracker.clobberRegister(Reg, *TRI);
}
- Tracker.trackCopy(MI, *TRI);
+ Tracker.trackCopy(&MI, *TRI);
continue;
}
// Clobber any earlyclobber regs first.
- for (const MachineOperand &MO : MI->operands())
+ for (const MachineOperand &MO : MI.operands())
if (MO.isReg() && MO.isEarlyClobber()) {
MCRegister Reg = MO.getReg().asMCReg();
// If we have a tied earlyclobber, that means it is also read by this
// instruction, so we need to make sure we don't remove it as dead
// later.
if (MO.isTied())
- ReadRegister(Reg, *MI, RegularUse);
+ ReadRegister(Reg, MI, RegularUse);
Tracker.clobberRegister(Reg, *TRI);
}
- forwardUses(*MI);
+ forwardUses(MI);
// Not a copy.
SmallVector<Register, 2> Defs;
const MachineOperand *RegMask = nullptr;
- for (const MachineOperand &MO : MI->operands()) {
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isRegMask())
RegMask = &MO;
if (!MO.isReg())
@@ -681,7 +701,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
Defs.push_back(Reg.asMCReg());
continue;
} else if (MO.readsReg())
- ReadRegister(Reg.asMCReg(), *MI, MO.isDebug() ? DebugUse : RegularUse);
+ ReadRegister(Reg.asMCReg(), MI, MO.isDebug() ? DebugUse : RegularUse);
}
// The instruction has a register mask operand which means that it clobbers
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp
index c8845d838282..28cff2a4f3f3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp
@@ -73,7 +73,7 @@ void MachineDominatorTree::releaseMemory() {
void MachineDominatorTree::verifyAnalysis() const {
if (DT && VerifyMachineDomInfo)
- if (!DT->verify(DomTreeT::VerificationLevel::Basic)) {
+ if (!DT->verify(MachineDomTree::VerificationLevel::Basic)) {
errs() << "MachineDominatorTree verification failed\n";
abort();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
index 0a454b68aca3..366d06871245 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
@@ -99,6 +99,7 @@ static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
case P::Selected: return "Selected";
case P::TracksLiveness: return "TracksLiveness";
case P::TiedOpsRewritten: return "TiedOpsRewritten";
+ case P::FailsVerification: return "FailsVerification";
}
llvm_unreachable("Invalid machine function property");
}
@@ -129,8 +130,8 @@ void ilist_alloc_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI,
const Function &F) {
- if (F.hasFnAttribute(Attribute::StackAlignment))
- return F.getFnStackAlignment();
+ if (auto MA = F.getFnStackAlign())
+ return MA->value();
return STI->getFrameLowering()->getStackAlign().value();
}
@@ -745,9 +746,8 @@ MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) {
// Add filters in a list.
auto *CVal = cast<Constant>(Val);
SmallVector<const GlobalValue *, 4> FilterList;
- for (User::op_iterator II = CVal->op_begin(), IE = CVal->op_end();
- II != IE; ++II)
- FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts()));
+ for (const Use &U : CVal->operands())
+ FilterList.push_back(cast<GlobalValue>(U->stripPointerCasts()));
addFilterTypeInfo(LandingPad, FilterList);
}
@@ -973,6 +973,9 @@ void MachineFunction::makeDebugValueSubstitution(DebugInstrOperandPair A,
unsigned Subreg) {
// Catch any accidental self-loops.
assert(A.first != B.first);
+ // Don't allow any substitutions _from_ the memory operand number.
+ assert(A.second != DebugOperandMemNumber);
+
DebugValueSubstitutions.push_back({A, B, Subreg});
}
@@ -1148,17 +1151,17 @@ auto MachineFunction::salvageCopySSA(MachineInstr &MI)
// locations.
;
} else {
- // Assert that this is the entry block. If it isn't, then there is some
- // code construct we don't recognise that deals with physregs across
- // blocks.
+ // Assert that this is the entry block, or an EH pad. If it isn't, then
+ // there is some code construct we don't recognise that deals with physregs
+ // across blocks.
assert(!State.first.isVirtual());
- assert(&*InsertBB.getParent()->begin() == &InsertBB);
+ assert(&*InsertBB.getParent()->begin() == &InsertBB || InsertBB.isEHPad());
}
// Create DBG_PHI for specified physreg.
auto Builder = BuildMI(InsertBB, InsertBB.getFirstNonPHI(), DebugLoc(),
TII.get(TargetOpcode::DBG_PHI));
- Builder.addReg(State.first, RegState::Debug);
+ Builder.addReg(State.first);
unsigned NewNum = getNewDebugInstrNum();
Builder.addImm(NewNum);
return ApplySubregisters({NewNum, 0u});
@@ -1171,10 +1174,9 @@ void MachineFunction::finalizeDebugInstrRefs() {
const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_VALUE);
MI.setDesc(RefII);
MI.getOperand(1).ChangeToRegister(0, false);
- MI.getOperand(0).setIsDebug();
};
- if (!getTarget().Options.ValueTrackingVariableLocations)
+ if (!useDebugInstrRef())
return;
for (auto &MBB : *this) {
@@ -1221,6 +1223,27 @@ void MachineFunction::finalizeDebugInstrRefs() {
}
}
+bool MachineFunction::useDebugInstrRef() const {
+ // Disable instr-ref at -O0: it's very slow (in compile time). We can still
+ // have optimized code inlined into this unoptimized code, however with
+ // fewer and less aggressive optimizations happening, coverage and accuracy
+ // should not suffer.
+ if (getTarget().getOptLevel() == CodeGenOpt::None)
+ return false;
+
+ // Don't use instr-ref if this function is marked optnone.
+ if (F.hasFnAttribute(Attribute::OptimizeNone))
+ return false;
+
+ if (getTarget().Options.ValueTrackingVariableLocations)
+ return true;
+
+ return false;
+}
+
+// Use one million as a high / reserved number.
+const unsigned MachineFunction::DebugOperandMemNumber = 1000000;
+
/// \}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
index 0707945e7fb7..5c4f75e9ceb9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
@@ -294,6 +294,9 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
NewMO->setIsEarlyClobber(true);
}
+ // Ensure debug instructions set debug flag on register uses.
+ if (NewMO->isUse() && isDebugInstr())
+ NewMO->setIsDebug();
}
}
@@ -2111,11 +2114,11 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
- auto MIB = BuildMI(MF, DL, MCID).addReg(Reg, RegState::Debug);
+ auto MIB = BuildMI(MF, DL, MCID).addReg(Reg);
if (IsIndirect)
MIB.addImm(0U);
else
- MIB.addReg(0U, RegState::Debug);
+ MIB.addReg(0U);
return MIB.addMetadata(Variable).addMetadata(Expr);
}
@@ -2134,7 +2137,7 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
if (IsIndirect)
MIB.addImm(0U);
else
- MIB.addReg(0U, RegState::Debug);
+ MIB.addReg(0U);
return MIB.addMetadata(Variable).addMetadata(Expr);
}
@@ -2153,7 +2156,7 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
MIB.addMetadata(Variable).addMetadata(Expr);
for (const MachineOperand &MO : MOs)
if (MO.isReg())
- MIB.addReg(MO.getReg(), RegState::Debug);
+ MIB.addReg(MO.getReg());
else
MIB.add(MO);
return MIB;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
index 883299c452b7..500cf8e0b79b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
@@ -230,6 +230,9 @@ namespace {
bool IsGuaranteedToExecute(MachineBasicBlock *BB);
+ bool isTriviallyReMaterializable(const MachineInstr &MI,
+ AAResults *AA) const;
+
void EnterScope(MachineBasicBlock *MBB);
void ExitScope(MachineBasicBlock *MBB);
@@ -659,6 +662,23 @@ bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) {
return true;
}
+/// Check if \p MI is trivially remateralizable and if it does not have any
+/// virtual register uses. Even though rematerializable RA might not actually
+/// rematerialize it in this scenario. In that case we do not want to hoist such
+/// instruction out of the loop in a belief RA will sink it back if needed.
+bool MachineLICMBase::isTriviallyReMaterializable(const MachineInstr &MI,
+ AAResults *AA) const {
+ if (!TII->isTriviallyReMaterializable(MI, AA))
+ return false;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual())
+ return false;
+ }
+
+ return true;
+}
+
void MachineLICMBase::EnterScope(MachineBasicBlock *MBB) {
LLVM_DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n');
@@ -761,15 +781,11 @@ void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
// Process the block
SpeculationState = SpeculateUnknown;
- for (MachineBasicBlock::iterator
- MII = MBB->begin(), E = MBB->end(); MII != E; ) {
- MachineBasicBlock::iterator NextMII = MII; ++NextMII;
- MachineInstr *MI = &*MII;
- if (!Hoist(MI, Preheader))
- UpdateRegPressure(MI);
+ for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
+ if (!Hoist(&MI, Preheader))
+ UpdateRegPressure(&MI);
// If we have hoisted an instruction that may store, it can only be a
// constant store.
- MII = NextMII;
}
// If it's a leaf node, it's done. Traverse upwards to pop ancestors.
@@ -1156,9 +1172,9 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
return false;
}
- // Rematerializable instructions should always be hoisted since the register
- // allocator can just pull them down again when needed.
- if (TII->isTriviallyReMaterializable(MI, AA))
+ // Rematerializable instructions should always be hoisted providing the
+ // register allocator can just pull them down again when needed.
+ if (isTriviallyReMaterializable(MI, AA))
return true;
// FIXME: If there are long latency loop-invariant instructions inside the
@@ -1211,7 +1227,7 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
// High register pressure situation, only hoist if the instruction is going
// to be remat'ed.
- if (!TII->isTriviallyReMaterializable(MI, AA) &&
+ if (!isTriviallyReMaterializable(MI, AA) &&
!MI.isDereferenceableInvariantLoad(AA)) {
LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 8f91a5b698d0..9b96bc5e5e7f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/InitializePasses.h"
@@ -154,7 +155,9 @@ MachineLoopInfo::findLoopPreheader(MachineLoop *L, bool SpeculativePreheader,
bool MachineLoop::isLoopInvariant(MachineInstr &I) const {
MachineFunction *MF = I.getParent()->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
- const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ const TargetSubtargetInfo &ST = MF->getSubtarget();
+ const TargetRegisterInfo *TRI = ST.getRegisterInfo();
+ const TargetInstrInfo *TII = ST.getInstrInfo();
// The instruction is loop invariant if all of its operands are.
for (const MachineOperand &MO : I.operands()) {
@@ -174,7 +177,8 @@ bool MachineLoop::isLoopInvariant(MachineInstr &I) const {
// However, if the physreg is known to always be caller saved/restored
// then this use is safe to hoist.
if (!MRI->isConstantPhysReg(Reg) &&
- !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())))
+ !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) &&
+ !TII->isIgnorableUse(MO))
return false;
// Otherwise it's safe to move.
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
index b8ba0453d24c..4d080e1a4f82 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
@@ -250,6 +250,11 @@ void MachineOperand::ChangeToRegister(Register Reg, bool isDef, bool isImp,
if (RegInfo && WasReg)
RegInfo->removeRegOperandFromUseList(this);
+ // Ensure debug instructions set debug flag on register uses.
+ const MachineInstr *MI = getParent();
+ if (!isDef && MI && MI->isDebugInstr())
+ isDebug = true;
+
// Change this to a register and set the reg#.
assert(!(isDead && !isDef) && "Dead flag on non-def");
assert(!(isKill && isDef) && "Kill flag on def");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
index 1d55bd00e033..cfbccebaff3e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -798,6 +798,7 @@ bool MachineOutliner::outline(Module &M,
Last = std::next(CallInst.getReverse());
Iter != Last; Iter++) {
MachineInstr *MI = &*Iter;
+ SmallSet<Register, 2> InstrUseRegs;
for (MachineOperand &MOP : MI->operands()) {
// Skip over anything that isn't a register.
if (!MOP.isReg())
@@ -806,7 +807,8 @@ bool MachineOutliner::outline(Module &M,
if (MOP.isDef()) {
// Introduce DefRegs set to skip the redundant register.
DefRegs.insert(MOP.getReg());
- if (!MOP.isDead() && UseRegs.count(MOP.getReg()))
+ if (UseRegs.count(MOP.getReg()) &&
+ !InstrUseRegs.count(MOP.getReg()))
// Since the regiester is modeled as defined,
// it is not necessary to be put in use register set.
UseRegs.erase(MOP.getReg());
@@ -814,6 +816,7 @@ bool MachineOutliner::outline(Module &M,
// Any register which is not undefined should
// be put in the use register set.
UseRegs.insert(MOP.getReg());
+ InstrUseRegs.insert(MOP.getReg());
}
}
if (MI->isCandidateForCallSiteEntry())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
index caa3f8049aeb..e18318386def 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -200,8 +200,7 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
if (!EnableSWP)
return false;
- if (mf.getFunction().getAttributes().hasAttribute(
- AttributeList::FunctionIndex, Attribute::OptimizeForSize) &&
+ if (mf.getFunction().getAttributes().hasFnAttr(Attribute::OptimizeForSize) &&
!EnableSWPOptSize.getPosition())
return false;
@@ -386,7 +385,7 @@ void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
MachineRegisterInfo &MRI = MF->getRegInfo();
SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
- for (MachineInstr &PI : make_range(B.begin(), B.getFirstNonPHI())) {
+ for (MachineInstr &PI : B.phis()) {
MachineOperand &DefOp = PI.getOperand(0);
assert(DefOp.getSubReg() == 0);
auto *RC = MRI.getRegClass(DefOp.getReg());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 3f6b11e072b4..19bf87d3e290 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -383,9 +383,7 @@ void MachineRegisterInfo::replaceRegWith(Register FromReg, Register ToReg) {
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
// TODO: This could be more efficient by bulk changing the operands.
- for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) {
- MachineOperand &O = *I;
- ++I;
+ for (MachineOperand &O : llvm::make_early_inc_range(reg_operands(FromReg))) {
if (Register::isPhysicalRegister(ToReg)) {
O.substPhysReg(ToReg, *TRI);
} else {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
index 4f42a2c8aeff..47d40f0823c8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -583,7 +583,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
<< " " << MBB->getName() << "\n From: " << *I
<< " To: ";
if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
- else dbgs() << "End";
+ else dbgs() << "End\n";
dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
if (DumpCriticalPathLength) {
errs() << MF->getName();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
index ec98394dca79..30745c7a5583 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
@@ -131,7 +131,7 @@ namespace {
// will be split.
SetVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ToSplit;
- SparseBitVector<> RegsToClearKillFlags;
+ DenseSet<Register> RegsToClearKillFlags;
using AllSuccsCache =
std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
@@ -476,14 +476,13 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
// of a def-use chain, if there is any.
// TODO: Sort the candidates using a cost-model.
unsigned i = 0;
- for (auto It = Candidates.rbegin(); It != Candidates.rend(); ++It) {
+ for (MachineInstr *I : llvm::reverse(Candidates)) {
if (i++ == SinkIntoLoopLimit) {
LLVM_DEBUG(dbgs() << "LoopSink: Limit reached of instructions to "
"be analysed.");
break;
}
- MachineInstr *I = *It;
if (!SinkIntoLoop(L, *I))
break;
EverMadeChange = true;
@@ -683,13 +682,9 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
// There is no need to do this check if all the uses are PHI nodes. PHI
// sources are only defined on the specific predecessor edges.
if (!BreakPHIEdge) {
- for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(),
- E = ToBB->pred_end(); PI != E; ++PI) {
- if (*PI == FromBB)
- continue;
- if (!DT->dominates(ToBB, *PI))
+ for (MachineBasicBlock *Pred : ToBB->predecessors())
+ if (Pred != FromBB && !DT->dominates(ToBB, Pred))
return false;
- }
}
ToSplit.insert(std::make_pair(FromBB, ToBB));
@@ -1329,7 +1324,8 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
// "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
const MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg()) continue;
+ if (!MO.isReg() || MO.isUse())
+ continue;
Register Reg = MO.getReg();
if (Reg == 0 || !Register::isPhysicalRegister(Reg))
continue;
@@ -1439,7 +1435,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
// used registers.
for (MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isUse())
- RegsToClearKillFlags.set(MO.getReg()); // Remember to clear kill flags.
+ RegsToClearKillFlags.insert(MO.getReg()); // Remember to clear kill flags.
}
return true;
@@ -1718,10 +1714,7 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
UsedRegUnits.clear();
SeenDbgInstrs.clear();
- for (auto I = CurBB.rbegin(), E = CurBB.rend(); I != E;) {
- MachineInstr *MI = &*I;
- ++I;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(CurBB))) {
// Track the operand index for use in Copy.
SmallVector<unsigned, 2> UsedOpsInCopy;
// Track the register number defed in Copy.
@@ -1729,14 +1722,14 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// We must sink this DBG_VALUE if its operand is sunk. To avoid searching
// for DBG_VALUEs later, record them when they're encountered.
- if (MI->isDebugValue()) {
+ if (MI.isDebugValue()) {
SmallDenseMap<MCRegister, SmallVector<unsigned, 2>, 4> MIUnits;
bool IsValid = true;
- for (MachineOperand &MO : MI->debug_operands()) {
+ for (MachineOperand &MO : MI.debug_operands()) {
if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) {
// Bail if we can already tell the sink would be rejected, rather
// than needlessly accumulating lots of DBG_VALUEs.
- if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy,
+ if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy,
ModifiedRegUnits, UsedRegUnits)) {
IsValid = false;
break;
@@ -1750,28 +1743,28 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
}
if (IsValid) {
for (auto RegOps : MIUnits)
- SeenDbgInstrs[RegOps.first].push_back({MI, RegOps.second});
+ SeenDbgInstrs[RegOps.first].push_back({&MI, RegOps.second});
}
continue;
}
- if (MI->isDebugOrPseudoInstr())
+ if (MI.isDebugOrPseudoInstr())
continue;
// Do not move any instruction across function call.
- if (MI->isCall())
+ if (MI.isCall())
return false;
- if (!MI->isCopy() || !MI->getOperand(0).isRenamable()) {
- LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
+ if (!MI.isCopy() || !MI.getOperand(0).isRenamable()) {
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
TRI);
continue;
}
// Don't sink the COPY if it would violate a register dependency.
- if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy,
+ if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy,
ModifiedRegUnits, UsedRegUnits)) {
- LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
TRI);
continue;
}
@@ -1782,7 +1775,7 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// Don't sink if we cannot find a single sinkable successor in which Reg
// is live-in.
if (!SuccBB) {
- LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
TRI);
continue;
}
@@ -1793,7 +1786,7 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// recorded which reg units that DBG_VALUEs read, if this instruction
// writes any of those units then the corresponding DBG_VALUEs must sink.
MapVector<MachineInstr *, MIRegs::second_type> DbgValsToSinkMap;
- for (auto &MO : MI->operands()) {
+ for (auto &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
@@ -1811,10 +1804,10 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// Clear the kill flag if SrcReg is killed between MI and the end of the
// block.
- clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
+ clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI();
- performSink(*MI, *SuccBB, InsertPos, DbgValsToSink);
- updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
+ performSink(MI, *SuccBB, InsertPos, DbgValsToSink);
+ updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
Changed = true;
++NumPostRACopySink;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp
index 584d43b42004..28712d1a816b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp
@@ -82,7 +82,7 @@ bool isFunctionColdInCallGraph(
ProfileSummaryInfo *PSI,
const MachineBlockFrequencyInfo &MBFI) {
if (auto FunctionCount = MF->getFunction().getEntryCount())
- if (!PSI->isColdCount(FunctionCount.getCount()))
+ if (!PSI->isColdCount(FunctionCount->getCount()))
return false;
for (const auto &MBB : *MF)
if (!isColdBlock(&MBB, PSI, &MBFI))
@@ -99,7 +99,7 @@ bool isFunctionHotInCallGraphNthPercentile(
const MachineBlockFrequencyInfo &MBFI) {
if (auto FunctionCount = MF->getFunction().getEntryCount())
if (PSI->isHotCountNthPercentile(PercentileCutoff,
- FunctionCount.getCount()))
+ FunctionCount->getCount()))
return true;
for (const auto &MBB : *MF)
if (isHotBlockNthPercentile(PercentileCutoff, &MBB, PSI, &MBFI))
@@ -112,7 +112,7 @@ bool isFunctionColdInCallGraphNthPercentile(
const MachineBlockFrequencyInfo &MBFI) {
if (auto FunctionCount = MF->getFunction().getEntryCount())
if (!PSI->isColdCountNthPercentile(PercentileCutoff,
- FunctionCount.getCount()))
+ FunctionCount->getCount()))
return false;
for (const auto &MBB : *MF)
if (!isColdBlockNthPercentile(PercentileCutoff, &MBB, PSI, &MBFI))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp
index a1cb12f91275..86cf4999d4b0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp
@@ -50,29 +50,26 @@ struct StripDebugMachineModule : public ModulePass {
continue;
MachineFunction &MF = *MaybeMF;
for (MachineBasicBlock &MBB : MF) {
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E;) {
- if (I->isDebugInstr()) {
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
+ if (MI.isDebugInstr()) {
// FIXME: We should remove all of them. However, AArch64 emits an
// invalid `DBG_VALUE $lr` with only one operand instead of
// the usual three and has a test that depends on it's
// preservation. Preserve it for now.
- if (I->getNumOperands() > 1) {
- LLVM_DEBUG(dbgs() << "Removing debug instruction " << *I);
- I = MBB.erase(I);
+ if (MI.getNumOperands() > 1) {
+ LLVM_DEBUG(dbgs() << "Removing debug instruction " << MI);
+ MBB.erase(&MI);
Changed |= true;
continue;
}
}
- if (I->getDebugLoc()) {
- LLVM_DEBUG(dbgs() << "Removing location " << *I);
- I->setDebugLoc(DebugLoc());
+ if (MI.getDebugLoc()) {
+ LLVM_DEBUG(dbgs() << "Removing location " << MI);
+ MI.setDebugLoc(DebugLoc());
Changed |= true;
- ++I;
continue;
}
- LLVM_DEBUG(dbgs() << "Keeping " << *I);
- ++I;
+ LLVM_DEBUG(dbgs() << "Keeping " << MI);
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
index 7e3198af02cd..d6bb3e7c9e58 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -210,6 +210,11 @@ namespace {
void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
void visitMachineBundleBefore(const MachineInstr *MI);
+ /// Verify that all of \p MI's virtual register operands are scalars.
+ /// \returns True if all virtual register operands are scalar. False
+ /// otherwise.
+ bool verifyAllRegOpsScalar(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI);
bool verifyVectorElementMatch(LLT Ty0, LLT Ty1, const MachineInstr *MI);
void verifyPreISelGenericInstruction(const MachineInstr *MI);
void visitMachineInstrBefore(const MachineInstr *MI);
@@ -287,6 +292,13 @@ namespace {
}
bool runOnMachineFunction(MachineFunction &MF) override {
+ // Skip functions that have known verification problems.
+ // FIXME: Remove this mechanism when all problematic passes have been
+ // fixed.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailsVerification))
+ return false;
+
unsigned FoundErrors = MachineVerifier(this, Banner.c_str()).verify(MF);
if (FoundErrors)
report_fatal_error("Found "+Twine(FoundErrors)+" machine code errors.");
@@ -849,6 +861,21 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) {
}
}
+bool MachineVerifier::verifyAllRegOpsScalar(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ if (none_of(MI.explicit_operands(), [&MRI](const MachineOperand &Op) {
+ if (!Op.isReg())
+ return false;
+ const auto Reg = Op.getReg();
+ if (Reg.isPhysical())
+ return false;
+ return !MRI.getType(Reg).isScalar();
+ }))
+ return true;
+ report("All register operands must have scalar types", &MI);
+ return false;
+}
+
/// Check that types are consistent when two operands need to have the same
/// number of vector elements.
/// \return true if the types are valid.
@@ -1392,7 +1419,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
AttributeList Attrs
= Intrinsic::getAttributes(MF->getFunction().getContext(),
static_cast<Intrinsic::ID>(IntrID));
- bool DeclHasSideEffects = !Attrs.hasFnAttribute(Attribute::ReadNone);
+ bool DeclHasSideEffects = !Attrs.hasFnAttr(Attribute::ReadNone);
if (NoSideEffects && DeclHasSideEffects) {
report("G_INTRINSIC used with intrinsic that accesses memory", MI);
break;
@@ -1570,11 +1597,8 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
case TargetOpcode::G_VECREDUCE_UMAX:
case TargetOpcode::G_VECREDUCE_UMIN: {
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
- LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
if (!DstTy.isScalar())
report("Vector reduction requires a scalar destination type", MI);
- if (!SrcTy.isVector())
- report("Vector reduction requires vector source=", MI);
break;
}
@@ -1598,7 +1622,11 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
}
break;
}
-
+ case TargetOpcode::G_LLROUND:
+ case TargetOpcode::G_LROUND: {
+ verifyAllRegOpsScalar(*MI, *MRI);
+ break;
+ }
default:
break;
}
@@ -1632,6 +1660,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
report("Unspillable Terminator does not define a reg", MI);
Register Def = MI->getOperand(0).getReg();
if (Def.isVirtual() &&
+ !MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoPHIs) &&
std::distance(MRI->use_nodbg_begin(Def), MRI->use_nodbg_end()) > 1)
report("Unspillable Terminator expected to have at most one use!", MI);
}
@@ -1866,6 +1896,15 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
switch (MO->getType()) {
case MachineOperand::MO_Register: {
+ // Verify debug flag on debug instructions. Check this first because reg0
+ // indicates an undefined debug value.
+ if (MI->isDebugInstr() && MO->isUse()) {
+ if (!MO->isDebug())
+ report("Register operand must be marked debug", MO, MONum);
+ } else if (MO->isDebug()) {
+ report("Register operand must not be marked debug", MO, MONum);
+ }
+
const Register Reg = MO->getReg();
if (!Reg)
return;
@@ -1932,10 +1971,6 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
return;
}
}
- if (MI->isDebugValue() && MO->isUse() && !MO->isDebug()) {
- report("Use-reg is not IsDebug in a DBG_VALUE", MO, MONum);
- return;
- }
} else {
// Virtual register.
const TargetRegisterClass *RC = MRI->getRegClassOrNull(Reg);
@@ -2182,14 +2217,30 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
const MachineInstr *MI = MO->getParent();
const Register Reg = MO->getReg();
+ const unsigned SubRegIdx = MO->getSubReg();
+
+ const LiveInterval *LI = nullptr;
+ if (LiveInts && Reg.isVirtual()) {
+ if (LiveInts->hasInterval(Reg)) {
+ LI = &LiveInts->getInterval(Reg);
+ if (SubRegIdx != 0 && !LI->empty() && !LI->hasSubRanges() &&
+ MRI->shouldTrackSubRegLiveness(Reg))
+ report("Live interval for subreg operand has no subranges", MO, MONum);
+ } else {
+ report("Virtual register has no live interval", MO, MONum);
+ }
+ }
// Both use and def operands can read a register.
if (MO->readsReg()) {
if (MO->isKill())
addRegWithSubRegs(regsKilled, Reg);
- // Check that LiveVars knows this kill.
- if (LiveVars && Register::isVirtualRegister(Reg) && MO->isKill()) {
+ // Check that LiveVars knows this kill (unless we are inside a bundle, in
+ // which case we have already checked that LiveVars knows any kills on the
+ // bundle header instead).
+ if (LiveVars && Reg.isVirtual() && MO->isKill() &&
+ !MI->isBundledWithPred()) {
LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
if (!is_contained(VI.Kills, MI))
report("Kill missing from LiveVariables", MO, MONum);
@@ -2209,42 +2260,36 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
}
}
- if (Register::isVirtualRegister(Reg)) {
- if (LiveInts->hasInterval(Reg)) {
- // This is a virtual register interval.
- const LiveInterval &LI = LiveInts->getInterval(Reg);
- checkLivenessAtUse(MO, MONum, UseIdx, LI, Reg);
-
- if (LI.hasSubRanges() && !MO->isDef()) {
- unsigned SubRegIdx = MO->getSubReg();
- LaneBitmask MOMask = SubRegIdx != 0
- ? TRI->getSubRegIndexLaneMask(SubRegIdx)
- : MRI->getMaxLaneMaskForVReg(Reg);
- LaneBitmask LiveInMask;
- for (const LiveInterval::SubRange &SR : LI.subranges()) {
- if ((MOMask & SR.LaneMask).none())
- continue;
- checkLivenessAtUse(MO, MONum, UseIdx, SR, Reg, SR.LaneMask);
- LiveQueryResult LRQ = SR.Query(UseIdx);
- if (LRQ.valueIn())
- LiveInMask |= SR.LaneMask;
- }
- // At least parts of the register has to be live at the use.
- if ((LiveInMask & MOMask).none()) {
- report("No live subrange at use", MO, MONum);
- report_context(LI);
- report_context(UseIdx);
- }
+ if (Reg.isVirtual()) {
+ // This is a virtual register interval.
+ checkLivenessAtUse(MO, MONum, UseIdx, *LI, Reg);
+
+ if (LI->hasSubRanges() && !MO->isDef()) {
+ LaneBitmask MOMask = SubRegIdx != 0
+ ? TRI->getSubRegIndexLaneMask(SubRegIdx)
+ : MRI->getMaxLaneMaskForVReg(Reg);
+ LaneBitmask LiveInMask;
+ for (const LiveInterval::SubRange &SR : LI->subranges()) {
+ if ((MOMask & SR.LaneMask).none())
+ continue;
+ checkLivenessAtUse(MO, MONum, UseIdx, SR, Reg, SR.LaneMask);
+ LiveQueryResult LRQ = SR.Query(UseIdx);
+ if (LRQ.valueIn())
+ LiveInMask |= SR.LaneMask;
+ }
+ // At least parts of the register has to be live at the use.
+ if ((LiveInMask & MOMask).none()) {
+ report("No live subrange at use", MO, MONum);
+ report_context(*LI);
+ report_context(UseIdx);
}
- } else {
- report("Virtual register has no live interval", MO, MONum);
}
}
}
// Use of a dead register.
if (!regsLive.count(Reg)) {
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
// Reserved registers may be used even when 'dead'.
bool Bad = !isReserved(Reg);
// We are fine if just any subregister has a defined value.
@@ -2266,7 +2311,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
if (!MOP.isReg() || !MOP.isImplicit())
continue;
- if (!Register::isPhysicalRegister(MOP.getReg()))
+ if (!MOP.getReg().isPhysical())
continue;
if (llvm::is_contained(TRI->subregs(MOP.getReg()), Reg))
@@ -2299,7 +2344,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
addRegWithSubRegs(regsDefined, Reg);
// Verify SSA form.
- if (MRI->isSSA() && Register::isVirtualRegister(Reg) &&
+ if (MRI->isSSA() && Reg.isVirtual() &&
std::next(MRI->def_begin(Reg)) != MRI->def_end())
report("Multiple virtual register defs in SSA form", MO, MONum);
@@ -2308,24 +2353,18 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
SlotIndex DefIdx = LiveInts->getInstructionIndex(*MI);
DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber());
- if (Register::isVirtualRegister(Reg)) {
- if (LiveInts->hasInterval(Reg)) {
- const LiveInterval &LI = LiveInts->getInterval(Reg);
- checkLivenessAtDef(MO, MONum, DefIdx, LI, Reg);
-
- if (LI.hasSubRanges()) {
- unsigned SubRegIdx = MO->getSubReg();
- LaneBitmask MOMask = SubRegIdx != 0
- ? TRI->getSubRegIndexLaneMask(SubRegIdx)
- : MRI->getMaxLaneMaskForVReg(Reg);
- for (const LiveInterval::SubRange &SR : LI.subranges()) {
- if ((SR.LaneMask & MOMask).none())
- continue;
- checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, true, SR.LaneMask);
- }
+ if (Reg.isVirtual()) {
+ checkLivenessAtDef(MO, MONum, DefIdx, *LI, Reg);
+
+ if (LI->hasSubRanges()) {
+ LaneBitmask MOMask = SubRegIdx != 0
+ ? TRI->getSubRegIndexLaneMask(SubRegIdx)
+ : MRI->getMaxLaneMaskForVReg(Reg);
+ for (const LiveInterval::SubRange &SR : LI->subranges()) {
+ if ((SR.LaneMask & MOMask).none())
+ continue;
+ checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, true, SR.LaneMask);
}
- } else {
- report("Virtual register has no Live interval", MO, MONum);
}
}
}
@@ -2918,9 +2957,13 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
}
}
- // A live segment can only end at an early-clobber slot if it is being
- // redefined by an early-clobber def.
- if (S.end.isEarlyClobber()) {
+ // After tied operands are rewritten, a live segment can only end at an
+ // early-clobber slot if it is being redefined by an early-clobber def.
+ // TODO: Before tied operands are rewritten, a live segment can only end at an
+ // early-clobber slot if the last use is tied to an early-clobber def.
+ if (MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::TiedOpsRewritten) &&
+ S.end.isEarlyClobber()) {
if (I+1 == LR.end() || (I+1)->start != S.end) {
report("Live segment ending at early clobber slot must be "
"redefined by an EC def in the same instruction", EndMBB);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp
index d2ee21c8720f..b0760322064c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp
@@ -44,15 +44,15 @@ static SUnit *getPredClusterSU(const SUnit &SU) {
return nullptr;
}
-static bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) {
+bool llvm::hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) {
unsigned Num = 1;
const SUnit *CurrentSU = &SU;
while ((CurrentSU = getPredClusterSU(*CurrentSU)) && Num < FuseLimit) Num ++;
return Num < FuseLimit;
}
-static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
- SUnit &SecondSU) {
+bool llvm::fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
+ SUnit &SecondSU) {
// Check that neither instr is already paired with another along the edge
// between them.
for (SDep &SI : FirstSU.Succs)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
index b5517c40a28a..8b3cdfab4d42 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -81,10 +81,7 @@ void ModuloScheduleExpander::expand() {
Register Reg = Op.getReg();
unsigned MaxDiff = 0;
bool PhiIsSwapped = false;
- for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
- EI = MRI.use_end();
- UI != EI; ++UI) {
- MachineOperand &UseOp = *UI;
+ for (MachineOperand &UseOp : MRI.use_operands(Reg)) {
MachineInstr *UseMI = UseOp.getParent();
int UseStage = Schedule.getStage(UseMI);
unsigned Diff = 0;
@@ -141,13 +138,11 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
// Copy any terminator instructions to the new kernel, and update
// names as needed.
- for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
- E = BB->instr_end();
- I != E; ++I) {
- MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
+ for (MachineInstr &MI : BB->terminators()) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
KernelBB->push_back(NewMI);
- InstrMap[NewMI] = &*I;
+ InstrMap[NewMI] = &MI;
}
NewKernel = KernelBB;
@@ -334,14 +329,10 @@ static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
MachineBasicBlock *MBB,
MachineRegisterInfo &MRI,
LiveIntervals &LIS) {
- for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
- E = MRI.use_end();
- I != E;) {
- MachineOperand &O = *I;
- ++I;
+ for (MachineOperand &O :
+ llvm::make_early_inc_range(MRI.use_operands(FromReg)))
if (O.getParent()->getParent() != MBB)
O.setReg(ToReg);
- }
if (!LIS.hasInterval(ToReg))
LIS.createEmptyInterval(ToReg);
}
@@ -350,10 +341,8 @@ static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
/// specified loop.
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
MachineRegisterInfo &MRI) {
- for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
- E = MRI.use_end();
- I != E; ++I)
- if (I->getParent()->getParent() != BB)
+ for (const MachineOperand &MO : MRI.use_operands(Reg))
+ if (MO.getParent()->getParent() != BB)
return true;
return false;
}
@@ -702,11 +691,9 @@ void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
MBBVectorTy &EpilogBBs) {
// For each epilog block, check that the value defined by each instruction
// is used. If not, delete it.
- for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
- MBE = EpilogBBs.rend();
- MBB != MBE; ++MBB)
- for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
- ME = (*MBB)->instr_rend();
+ for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs))
+ for (MachineBasicBlock::reverse_instr_iterator MI = MBB->instr_rbegin(),
+ ME = MBB->instr_rend();
MI != ME;) {
// From DeadMachineInstructionElem. Don't delete inline assembly.
if (MI->isInlineAsm()) {
@@ -721,26 +708,22 @@ void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
continue;
}
bool used = true;
- for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end();
- MOI != MOE; ++MOI) {
- if (!MOI->isReg() || !MOI->isDef())
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || !MO.isDef())
continue;
- Register reg = MOI->getReg();
+ Register reg = MO.getReg();
// Assume physical registers are used, unless they are marked dead.
if (Register::isPhysicalRegister(reg)) {
- used = !MOI->isDead();
+ used = !MO.isDead();
if (used)
break;
continue;
}
unsigned realUses = 0;
- for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
- EI = MRI.use_end();
- UI != EI; ++UI) {
+ for (const MachineOperand &U : MRI.use_operands(reg)) {
// Check if there are any uses that occur only in the original
// loop. If so, that's not a real use.
- if (UI->getParent()->getParent() != BB) {
+ if (U.getParent()->getParent() != BB) {
realUses++;
used = true;
break;
@@ -759,15 +742,11 @@ void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
}
// In the kernel block, check if we can remove a Phi that generates a value
// used in an instruction removed in the epilog block.
- for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
- BBE = KernelBB->getFirstNonPHI();
- BBI != BBE;) {
- MachineInstr *MI = &*BBI;
- ++BBI;
- Register reg = MI->getOperand(0).getReg();
+ for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) {
+ Register reg = MI.getOperand(0).getReg();
if (MRI.use_begin(reg) == MRI.use_end()) {
- LIS.RemoveMachineInstrFromMaps(*MI);
- MI->eraseFromParent();
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI.eraseFromParent();
}
}
}
@@ -1145,12 +1124,9 @@ void ModuloScheduleExpander::rewriteScheduledInstr(
int StagePhi = Schedule.getStage(Phi) + PhiNum;
// Rewrite uses that have been scheduled already to use the new
// Phi register.
- for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
- EI = MRI.use_end();
- UI != EI;) {
- MachineOperand &UseOp = *UI;
+ for (MachineOperand &UseOp :
+ llvm::make_early_inc_range(MRI.use_operands(OldReg))) {
MachineInstr *UseMI = UseOp.getParent();
- ++UI;
if (UseMI->getParent() != BB)
continue;
if (UseMI->isPHI()) {
@@ -1223,8 +1199,7 @@ void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
bool Changed = true;
while (Changed) {
Changed = false;
- for (auto I = MBB->begin(); I != MBB->getFirstNonPHI();) {
- MachineInstr &MI = *I++;
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB->phis())) {
assert(MI.isPHI());
if (MRI.use_empty(MI.getOperand(0).getReg())) {
if (LIS)
@@ -1624,32 +1599,32 @@ void PeelingModuloScheduleExpander::moveStageBetweenBlocks(
MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
auto InsertPt = DestBB->getFirstNonPHI();
DenseMap<Register, Register> Remaps;
- for (auto I = SourceBB->getFirstNonPHI(); I != SourceBB->end();) {
- MachineInstr *MI = &*I++;
- if (MI->isPHI()) {
+ for (MachineInstr &MI : llvm::make_early_inc_range(
+ llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) {
+ if (MI.isPHI()) {
// This is an illegal PHI. If we move any instructions using an illegal
// PHI, we need to create a legal Phi.
- if (getStage(MI) != Stage) {
+ if (getStage(&MI) != Stage) {
// The legal Phi is not necessary if the illegal phi's stage
// is being moved.
- Register PhiR = MI->getOperand(0).getReg();
+ Register PhiR = MI.getOperand(0).getReg();
auto RC = MRI.getRegClass(PhiR);
Register NR = MRI.createVirtualRegister(RC);
MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),
DebugLoc(), TII->get(TargetOpcode::PHI), NR)
.addReg(PhiR)
.addMBB(SourceBB);
- BlockMIs[{DestBB, CanonicalMIs[MI]}] = NI;
- CanonicalMIs[NI] = CanonicalMIs[MI];
+ BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI;
+ CanonicalMIs[NI] = CanonicalMIs[&MI];
Remaps[PhiR] = NR;
}
}
- if (getStage(MI) != Stage)
+ if (getStage(&MI) != Stage)
continue;
- MI->removeFromParent();
- DestBB->insert(InsertPt, MI);
- auto *KernelMI = CanonicalMIs[MI];
- BlockMIs[{DestBB, KernelMI}] = MI;
+ MI.removeFromParent();
+ DestBB->insert(InsertPt, &MI);
+ auto *KernelMI = CanonicalMIs[&MI];
+ BlockMIs[{DestBB, KernelMI}] = &MI;
BlockMIs.erase({SourceBB, KernelMI});
}
SmallVector<MachineInstr *, 4> PhiToDelete;
@@ -1768,8 +1743,8 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
// Keep track at which iteration each phi belongs to. We need it to know
// what version of the variable to use during prologue/epilogue stitching.
EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
- for (auto Phi = B->begin(), IE = B->getFirstNonPHI(); Phi != IE; ++Phi)
- PhiNodeLoopIteration[&*Phi] = Schedule.getNumStages() - I;
+ for (MachineInstr &Phi : B->phis())
+ PhiNodeLoopIteration[&Phi] = Schedule.getNumStages() - I;
}
for (size_t I = 0; I < Epilogs.size(); I++) {
LS.reset();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
index 54805584dbc1..77a6c37e1362 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
@@ -107,6 +107,7 @@ namespace {
using BBVRegPair = std::pair<unsigned, Register>;
using VRegPHIUse = DenseMap<BBVRegPair, unsigned>;
+ // Count the number of non-undef PHI uses of each register in each BB.
VRegPHIUse VRegPHIUseCount;
// Defs of PHI sources which are implicit_def.
@@ -426,9 +427,13 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
}
// Adjust the VRegPHIUseCount map to account for the removal of this PHI node.
- for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
- --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(),
- MPhi->getOperand(i).getReg())];
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
+ if (!MPhi->getOperand(i).isUndef()) {
+ --VRegPHIUseCount[BBVRegPair(
+ MPhi->getOperand(i + 1).getMBB()->getNumber(),
+ MPhi->getOperand(i).getReg())];
+ }
+ }
// Now loop over all of the incoming arguments, changing them to copy into the
// IncomingReg register in the corresponding predecessor basic block.
@@ -461,6 +466,15 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
assert(MRI->use_empty(SrcReg) &&
"Expected a single use from UnspillableTerminator");
SrcRegDef->getOperand(0).setReg(IncomingReg);
+
+ // Update LiveVariables.
+ if (LV) {
+ LiveVariables::VarInfo &SrcVI = LV->getVarInfo(SrcReg);
+ LiveVariables::VarInfo &IncomingVI = LV->getVarInfo(IncomingReg);
+ IncomingVI.AliveBlocks = std::move(SrcVI.AliveBlocks);
+ SrcVI.AliveBlocks.clear();
+ }
+
continue;
}
@@ -515,9 +529,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// case, we should mark the last such terminator as being the killing
// block, not the copy.
MachineBasicBlock::iterator KillInst = opBlock.end();
- MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator();
- for (MachineBasicBlock::iterator Term = FirstTerm;
- Term != opBlock.end(); ++Term) {
+ for (MachineBasicBlock::iterator Term = InsertPos; Term != opBlock.end();
+ ++Term) {
if (Term->readsRegister(SrcReg))
KillInst = Term;
}
@@ -527,7 +540,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
if (reusedIncoming || !IncomingReg) {
// We may have to rewind a bit if we didn't insert a copy this time.
- KillInst = FirstTerm;
+ KillInst = InsertPos;
while (KillInst != opBlock.begin()) {
--KillInst;
if (KillInst->isDebugInstr())
@@ -574,9 +587,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
if (!isLiveOut) {
MachineBasicBlock::iterator KillInst = opBlock.end();
- MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator();
- for (MachineBasicBlock::iterator Term = FirstTerm;
- Term != opBlock.end(); ++Term) {
+ for (MachineBasicBlock::iterator Term = InsertPos;
+ Term != opBlock.end(); ++Term) {
if (Term->readsRegister(SrcReg))
KillInst = Term;
}
@@ -586,7 +598,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
if (reusedIncoming || !IncomingReg) {
// We may have to rewind a bit if we didn't just insert a copy.
- KillInst = FirstTerm;
+ KillInst = InsertPos;
while (KillInst != opBlock.begin()) {
--KillInst;
if (KillInst->isDebugInstr())
@@ -623,14 +635,19 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
/// used in a PHI node. We map that to the BB the vreg is coming from. This is
/// used later to determine when the vreg is killed in the BB.
void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
- for (const auto &MBB : MF)
+ for (const auto &MBB : MF) {
for (const auto &BBI : MBB) {
if (!BBI.isPHI())
break;
- for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2)
- ++VRegPHIUseCount[BBVRegPair(BBI.getOperand(i+1).getMBB()->getNumber(),
- BBI.getOperand(i).getReg())];
+ for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2) {
+ if (!BBI.getOperand(i).isUndef()) {
+ ++VRegPHIUseCount[BBVRegPair(
+ BBI.getOperand(i + 1).getMBB()->getNumber(),
+ BBI.getOperand(i).getReg())];
+ }
+ }
}
+ }
}
bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 49bdba518322..f9b16d2630d6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -626,7 +626,7 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) {
// If this instruction is a comparison against zero and isn't comparing a
// physical register, we can try to optimize it.
Register SrcReg, SrcReg2;
- int CmpMask, CmpValue;
+ int64_t CmpMask, CmpValue;
if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
SrcReg.isPhysical() || SrcReg2.isPhysical())
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 80c38f3ec341..e3eb3f825851 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/PreISelIntrinsicLowering.h"
#include "llvm/Analysis/ObjCARCInstKind.h"
+#include "llvm/Analysis/ObjCARCUtil.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -36,9 +37,8 @@ static bool lowerLoadRelative(Function &F) {
Type *Int32PtrTy = Int32Ty->getPointerTo();
Type *Int8Ty = Type::getInt8Ty(F.getContext());
- for (auto I = F.use_begin(), E = F.use_end(); I != E;) {
- auto CI = dyn_cast<CallInst>(I->getUser());
- ++I;
+ for (Use &U : llvm::make_early_inc_range(F.uses())) {
+ auto CI = dyn_cast<CallInst>(U.getUser());
if (!CI || CI->getCalledOperand() != &F)
continue;
@@ -90,10 +90,22 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
CallInst::TailCallKind OverridingTCK = getOverridingTailCallKind(F);
- for (auto I = F.use_begin(), E = F.use_end(); I != E;) {
- auto *CI = cast<CallInst>(I->getUser());
+ for (Use &U : llvm::make_early_inc_range(F.uses())) {
+ auto *CB = cast<CallBase>(U.getUser());
+
+ if (CB->getCalledFunction() != &F) {
+ objcarc::ARCInstKind Kind = objcarc::getAttachedARCFunctionKind(CB);
+ (void)Kind;
+ assert((Kind == objcarc::ARCInstKind::RetainRV ||
+ Kind == objcarc::ARCInstKind::ClaimRV) &&
+ "use expected to be the argument of operand bundle "
+ "\"clang.arc.attachedcall\"");
+ U.set(FCache.getCallee());
+ continue;
+ }
+
+ auto *CI = cast<CallInst>(CB);
assert(CI->getCalledFunction() && "Cannot lower an indirect call!");
- ++I;
IRBuilder<> Builder(CI->getParent(), CI->getIterator());
SmallVector<Value *, 8> Args(CI->args());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 2f65a450fb02..9a4f70a6070f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -285,7 +285,7 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
(void)Failed;
}
if (StackSize > Threshold) {
- DiagnosticInfoStackSize DiagStackSize(F, StackSize, DS_Warning, Threshold);
+ DiagnosticInfoStackSize DiagStackSize(F, StackSize, Threshold, DS_Warning);
F.getContext().diagnose(DiagStackSize);
}
ORE->emit([&]() {
@@ -395,12 +395,28 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo();
const MCPhysReg *CSRegs = F.getRegInfo().getCalleeSavedRegs();
+ BitVector CSMask(SavedRegs.size());
+
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ CSMask.set(CSRegs[i]);
std::vector<CalleeSavedInfo> CSI;
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
- if (SavedRegs.test(Reg))
- CSI.push_back(CalleeSavedInfo(Reg));
+ if (SavedRegs.test(Reg)) {
+ bool SavedSuper = false;
+ for (const MCPhysReg &SuperReg : RegInfo->superregs(Reg)) {
+ // Some backends set all aliases for some registers as saved, such as
+ // Mips's $fp, so they appear in SavedRegs but not CSRegs.
+ if (SavedRegs.test(SuperReg) && CSMask.test(SuperReg)) {
+ SavedSuper = true;
+ break;
+ }
+ }
+
+ if (!SavedSuper)
+ CSI.push_back(CalleeSavedInfo(Reg));
+ }
}
const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering();
@@ -1237,7 +1253,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
StackOffset Offset =
TFI->getFrameIndexReference(MF, FrameIdx, Reg);
Op.ChangeToRegister(Reg, false /*isDef*/);
- Op.setIsDebug();
const DIExpression *DIExpr = MI.getDebugExpression();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp
index a9fb577d5735..5f69f9194125 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp
@@ -44,7 +44,14 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
+ bool doInitialization(Module &M) override {
+ ShouldRun = M.getNamedMetadata(PseudoProbeDescMetadataName);
+ return false;
+ }
+
bool runOnMachineFunction(MachineFunction &MF) override {
+ if (!ShouldRun)
+ return false;
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
@@ -129,6 +136,8 @@ private:
Name = SP->getName();
return Function::getGUID(Name);
}
+
+ bool ShouldRun = false;
};
} // namespace
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp
index d92c6a997f31..d704cf7b3213 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp
@@ -171,7 +171,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
SmallSet<NodeId,32> Defs;
- // Remove all non-phi defs that are not aliased to RefRR, and segregate
+ // Remove all non-phi defs that are not aliased to RefRR, and separate
// the the remaining defs into buckets for containing blocks.
std::map<NodeId, NodeAddr<InstrNode*>> Owners;
std::map<MachineBasicBlock*, SmallVector<NodeId,32>> Blocks;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index c850571da2ed..1264e6021b6e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -30,16 +30,32 @@ static bool isValidRegUse(const MachineOperand &MO) {
return isValidReg(MO) && MO.isUse();
}
-static bool isValidRegUseOf(const MachineOperand &MO, MCRegister PhysReg) {
- return isValidRegUse(MO) && MO.getReg() == PhysReg;
+static bool isValidRegUseOf(const MachineOperand &MO, MCRegister PhysReg,
+ const TargetRegisterInfo *TRI) {
+ if (!isValidRegUse(MO))
+ return false;
+ if (MO.getReg() == PhysReg)
+ return true;
+ for (MCRegAliasIterator R(PhysReg, TRI, false); R.isValid(); ++R)
+ if (MO.getReg() == *R)
+ return true;
+ return false;
}
static bool isValidRegDef(const MachineOperand &MO) {
return isValidReg(MO) && MO.isDef();
}
-static bool isValidRegDefOf(const MachineOperand &MO, MCRegister PhysReg) {
- return isValidRegDef(MO) && MO.getReg() == PhysReg;
+static bool isValidRegDefOf(const MachineOperand &MO, MCRegister PhysReg,
+ const TargetRegisterInfo *TRI) {
+ if (!isValidRegDef(MO))
+ return false;
+ if (MO.getReg() == PhysReg)
+ return true;
+ for (MCRegAliasIterator R(PhysReg, TRI, false); R.isValid(); ++R)
+ if (MO.getReg() == *R)
+ return true;
+ return false;
}
void ReachingDefAnalysis::enterBasicBlock(MachineBasicBlock *MBB) {
@@ -337,7 +353,7 @@ void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def,
return;
for (auto &MO : MI->operands()) {
- if (!isValidRegUseOf(MO, PhysReg))
+ if (!isValidRegUseOf(MO, PhysReg, TRI))
continue;
Uses.insert(&*MI);
@@ -353,7 +369,7 @@ bool ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB,
for (MachineInstr &MI :
instructionsWithoutDebug(MBB->instr_begin(), MBB->instr_end())) {
for (auto &MO : MI.operands()) {
- if (!isValidRegUseOf(MO, PhysReg))
+ if (!isValidRegUseOf(MO, PhysReg, TRI))
continue;
if (getReachingDef(&MI, PhysReg) >= 0)
return false;
@@ -381,8 +397,7 @@ void ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, MCRegister PhysReg,
SmallVector<MachineBasicBlock *, 4> ToVisit(MBB->successors());
SmallPtrSet<MachineBasicBlock*, 4>Visited;
while (!ToVisit.empty()) {
- MachineBasicBlock *MBB = ToVisit.back();
- ToVisit.pop_back();
+ MachineBasicBlock *MBB = ToVisit.pop_back_val();
if (Visited.count(MBB) || !MBB->isLiveIn(PhysReg))
continue;
if (getLiveInUses(MBB, PhysReg, Uses))
@@ -419,7 +434,7 @@ void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB,
VisitedBBs.insert(MBB);
LivePhysRegs LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
- if (!LiveRegs.contains(PhysReg))
+ if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
return;
if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg))
@@ -469,7 +484,7 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI,
LiveRegs.addLiveOuts(*MBB);
// Yes if the register is live out of the basic block.
- if (LiveRegs.contains(PhysReg))
+ if (!LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
return true;
// Walk backwards through the block to see if the register is live at some
@@ -477,7 +492,7 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI,
for (MachineInstr &Last :
instructionsWithoutDebug(MBB->instr_rbegin(), MBB->instr_rend())) {
LiveRegs.stepBackward(Last);
- if (LiveRegs.contains(PhysReg))
+ if (!LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
return InstIds.lookup(&Last) > InstIds.lookup(MI);
}
return false;
@@ -502,7 +517,7 @@ bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI,
MachineBasicBlock *MBB = MI->getParent();
LivePhysRegs LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
- if (!LiveRegs.contains(PhysReg))
+ if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
return false;
auto Last = MBB->getLastNonDebugInstr();
@@ -512,7 +527,7 @@ bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI,
// Finally check that the last instruction doesn't redefine the register.
for (auto &MO : Last->operands())
- if (isValidRegDefOf(MO, PhysReg))
+ if (isValidRegDefOf(MO, PhysReg, TRI))
return false;
return true;
@@ -523,7 +538,7 @@ ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB,
MCRegister PhysReg) const {
LivePhysRegs LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
- if (!LiveRegs.contains(PhysReg))
+ if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
return nullptr;
auto Last = MBB->getLastNonDebugInstr();
@@ -532,7 +547,7 @@ ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB,
int Def = getReachingDef(&*Last, PhysReg);
for (auto &MO : Last->operands())
- if (isValidRegDefOf(MO, PhysReg))
+ if (isValidRegDefOf(MO, PhysReg, TRI))
return &*Last;
return Def < 0 ? nullptr : getInstFromId(MBB, Def);
@@ -700,7 +715,7 @@ bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg,
if (Ignore.count(&*I))
continue;
for (auto &MO : I->operands())
- if (isValidRegDefOf(MO, PhysReg))
+ if (isValidRegDefOf(MO, PhysReg, TRI))
return false;
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp
index b65d58077958..a9816b13e798 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -217,9 +217,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg,
// Collect interferences assigned to any alias of the physical register.
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
- Q.collectInterferingVRegs();
- for (unsigned i = Q.interferingVRegs().size(); i; --i) {
- LiveInterval *Intf = Q.interferingVRegs()[i - 1];
+ for (auto *Intf : reverse(Q.interferingVRegs())) {
if (!Intf->isSpillable() || Intf->weight() > VirtReg.weight())
return false;
Intfs.push_back(Intf);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
new file mode 100644
index 000000000000..85fd3207888b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
@@ -0,0 +1,90 @@
+//===- RegAllocEvictionAdvisor.h - Interference resolution ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H
+#define LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H
+
+#include "AllocationOrder.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+using SmallVirtRegSet = SmallSet<Register, 16>;
+
+// Live ranges pass through a number of stages as we try to allocate them.
+// Some of the stages may also create new live ranges:
+//
+// - Region splitting.
+// - Per-block splitting.
+// - Local splitting.
+// - Spilling.
+//
+// Ranges produced by one of the stages skip the previous stages when they are
+// dequeued. This improves performance because we can skip interference checks
+// that are unlikely to give any results. It also guarantees that the live
+// range splitting algorithm terminates, something that is otherwise hard to
+// ensure.
+enum LiveRangeStage {
+ /// Newly created live range that has never been queued.
+ RS_New,
+
+ /// Only attempt assignment and eviction. Then requeue as RS_Split.
+ RS_Assign,
+
+ /// Attempt live range splitting if assignment is impossible.
+ RS_Split,
+
+ /// Attempt more aggressive live range splitting that is guaranteed to make
+ /// progress. This is used for split products that may not be making
+ /// progress.
+ RS_Split2,
+
+ /// Live range will be spilled. No more splitting will be attempted.
+ RS_Spill,
+
+ /// Live range is in memory. Because of other evictions, it might get moved
+ /// in a register in the end.
+ RS_Memory,
+
+ /// There is nothing more we can do to this live range. Abort compilation
+ /// if it can't be assigned.
+ RS_Done
+};
+
+/// Cost of evicting interference - used by default advisor, and the eviction
+/// chain heuristic in RegAllocGreedy.
+// FIXME: this can be probably made an implementation detail of the default
+// advisor, if the eviction chain logic can be refactored.
+struct EvictionCost {
+ unsigned BrokenHints = 0; ///< Total number of broken hints.
+ float MaxWeight = 0; ///< Maximum spill weight evicted.
+
+ EvictionCost() = default;
+
+ bool isMax() const { return BrokenHints == ~0u; }
+
+ void setMax() { BrokenHints = ~0u; }
+
+ void setBrokenHints(unsigned NHints) { BrokenHints = NHints; }
+
+ bool operator<(const EvictionCost &O) const {
+ return std::tie(BrokenHints, MaxWeight) <
+ std::tie(O.BrokenHints, O.MaxWeight);
+ }
+};
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
index 707161d5a8b0..68920e2e50df 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseSet.h"
@@ -432,7 +433,7 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg,
// every definition of it, meaning we can switch all the DBG_VALUEs over
// to just reference the stack slot.
SmallVectorImpl<MachineOperand *> &LRIDbgOperands = LiveDbgValueMap[VirtReg];
- SmallDenseMap<MachineInstr *, SmallVector<const MachineOperand *>>
+ SmallMapVector<MachineInstr *, SmallVector<const MachineOperand *>, 2>
SpilledOperandsMap;
for (MachineOperand *MO : LRIDbgOperands)
SpilledOperandsMap[MO->getParent()].push_back(MO);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 4eb12aa30ee9..5a93b58e0baf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -15,6 +15,7 @@
#include "InterferenceCache.h"
#include "LiveDebugVariables.h"
#include "RegAllocBase.h"
+#include "RegAllocEvictionAdvisor.h"
#include "SpillPlacement.h"
#include "SplitKit.h"
#include "llvm/ADT/ArrayRef.h"
@@ -57,6 +58,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -69,7 +71,6 @@
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/IR/DebugInfoMetadata.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -148,7 +149,6 @@ class RAGreedy : public MachineFunctionPass,
// Convenient shortcuts.
using PQueue = std::priority_queue<std::pair<unsigned, unsigned>>;
using SmallLISet = SmallPtrSet<LiveInterval *, 4>;
- using SmallVirtRegSet = SmallSet<Register, 16>;
// context
MachineFunction *MF;
@@ -175,47 +175,6 @@ class RAGreedy : public MachineFunctionPass,
unsigned NextCascade;
std::unique_ptr<VirtRegAuxInfo> VRAI;
- // Live ranges pass through a number of stages as we try to allocate them.
- // Some of the stages may also create new live ranges:
- //
- // - Region splitting.
- // - Per-block splitting.
- // - Local splitting.
- // - Spilling.
- //
- // Ranges produced by one of the stages skip the previous stages when they are
- // dequeued. This improves performance because we can skip interference checks
- // that are unlikely to give any results. It also guarantees that the live
- // range splitting algorithm terminates, something that is otherwise hard to
- // ensure.
- enum LiveRangeStage {
- /// Newly created live range that has never been queued.
- RS_New,
-
- /// Only attempt assignment and eviction. Then requeue as RS_Split.
- RS_Assign,
-
- /// Attempt live range splitting if assignment is impossible.
- RS_Split,
-
- /// Attempt more aggressive live range splitting that is guaranteed to make
- /// progress. This is used for split products that may not be making
- /// progress.
- RS_Split2,
-
- /// Live range will be spilled. No more splitting will be attempted.
- RS_Spill,
-
-
- /// Live range is in memory. Because of other evictions, it might get moved
- /// in a register in the end.
- RS_Memory,
-
- /// There is nothing more we can do to this live range. Abort compilation
- /// if it can't be assigned.
- RS_Done
- };
-
// Enum CutOffStage to keep a track whether the register allocation failed
// because of the cutoffs encountered in last chance recoloring.
// Note: This is used as bitmask. New value should be next power of 2.
@@ -267,25 +226,6 @@ class RAGreedy : public MachineFunctionPass,
}
}
- /// Cost of evicting interference.
- struct EvictionCost {
- unsigned BrokenHints = 0; ///< Total number of broken hints.
- float MaxWeight = 0; ///< Maximum spill weight evicted.
-
- EvictionCost() = default;
-
- bool isMax() const { return BrokenHints == ~0u; }
-
- void setMax() { BrokenHints = ~0u; }
-
- void setBrokenHints(unsigned NHints) { BrokenHints = NHints; }
-
- bool operator<(const EvictionCost &O) const {
- return std::tie(BrokenHints, MaxWeight) <
- std::tie(O.BrokenHints, O.MaxWeight);
- }
- };
-
/// EvictionTrack - Keeps track of past evictions in order to optimize region
/// split decision.
class EvictionTrack {
@@ -488,6 +428,8 @@ private:
MCRegister tryAssign(LiveInterval&, AllocationOrder&,
SmallVectorImpl<Register>&,
const SmallVirtRegSet&);
+ MCRegister tryFindEvictionCandidate(LiveInterval &, const AllocationOrder &,
+ uint8_t, const SmallVirtRegSet &) const;
MCRegister tryEvict(LiveInterval &, AllocationOrder &,
SmallVectorImpl<Register> &, uint8_t,
const SmallVirtRegSet &);
@@ -760,10 +702,9 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
// Giant live ranges fall back to the global assignment heuristic, which
// prevents excessive spilling in pathological cases.
bool ReverseLocal = TRI->reverseLocalAssignment();
- bool AddPriorityToGlobal = TRI->addAllocPriorityToGlobalRanges();
const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
bool ForceGlobal = !ReverseLocal &&
- (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs());
+ (Size / SlotIndex::InstrDist) > (2 * RCI.getNumAllocatableRegs(&RC));
if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
LIS->intervalIsInOneMBB(*LI)) {
@@ -785,8 +726,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
// interference. Mark a bit to prioritize global above local ranges.
Prio = (1u << 29) + Size;
- if (AddPriorityToGlobal)
- Prio |= RC.AllocationPriority << 24;
+ Prio |= RC.AllocationPriority << 24;
}
// Mark a higher bit to prioritize global and local above RS_Split.
Prio |= (1u << 31);
@@ -860,7 +800,7 @@ MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg,
return PhysReg;
LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
- << Cost << '\n');
+ << (unsigned)Cost << '\n');
MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
return CheapReg ? CheapReg : PhysReg;
}
@@ -957,11 +897,12 @@ bool RAGreedy::canEvictInterference(
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
// If there is 10 or more interferences, chances are one is heavier.
- if (Q.collectInterferingVRegs(10) >= 10)
+ const auto &Interferences = Q.interferingVRegs(10);
+ if (Interferences.size() >= 10)
return false;
// Check if any interfering live range is heavier than MaxWeight.
- for (LiveInterval *Intf : reverse(Q.interferingVRegs())) {
+ for (LiveInterval *Intf : reverse(Interferences)) {
assert(Register::isVirtualRegister(Intf->reg()) &&
"Only expecting virtual register interference from query");
@@ -1039,7 +980,6 @@ bool RAGreedy::canEvictInterferenceInRange(const LiveInterval &VirtReg,
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
- Q.collectInterferingVRegs();
// Check if any interfering live range is heavier than MaxWeight.
for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
@@ -1129,7 +1069,6 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
// should be fast, we may need to recalculate if when different physregs
// overlap the same register unit so we had different SubRanges queried
// against it.
- Q.collectInterferingVRegs();
ArrayRef<LiveInterval*> IVR = Q.interferingVRegs();
Intfs.append(IVR.begin(), IVR.end());
}
@@ -1162,17 +1101,9 @@ bool RAGreedy::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
return !Matrix->isPhysRegUsed(PhysReg);
}
-/// tryEvict - Try to evict all interferences for a physreg.
-/// @param VirtReg Currently unassigned virtual register.
-/// @param Order Physregs to try.
-/// @return Physreg to assign VirtReg, or 0.
-MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order,
- SmallVectorImpl<Register> &NewVRegs,
- uint8_t CostPerUseLimit,
- const SmallVirtRegSet &FixedRegisters) {
- NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
- TimePassesIsEnabled);
-
+MCRegister RAGreedy::tryFindEvictionCandidate(
+ LiveInterval &VirtReg, const AllocationOrder &Order,
+ uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
// Keep track of the cheapest interference seen so far.
EvictionCost BestCost;
BestCost.setMax();
@@ -1230,7 +1161,22 @@ MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order,
if (I.isHint())
break;
}
+ return BestPhys;
+}
+/// tryEvict - Try to evict all interferences for a physreg.
+/// @param VirtReg Currently unassigned virtual register.
+/// @param Order Physregs to try.
+/// @return Physreg to assign VirtReg, or 0.
+MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<Register> &NewVRegs,
+ uint8_t CostPerUseLimit,
+ const SmallVirtRegSet &FixedRegisters) {
+ NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
+ TimePassesIsEnabled);
+
+ MCRegister BestPhys =
+ tryFindEvictionCandidate(VirtReg, Order, CostPerUseLimit, FixedRegisters);
if (BestPhys.isValid())
evictInterference(VirtReg, BestPhys, NewVRegs);
return BestPhys;
@@ -2135,7 +2081,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// the constraints on the virtual register.
// Otherwise, splitting just inserts uncoalescable copies that do not help
// the allocation.
- for (const auto &Use : Uses) {
+ for (const SlotIndex Use : Uses) {
if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use))
if (MI->isFullCopy() ||
SuperRCNumAllocatableRegs ==
@@ -2462,12 +2408,12 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
if (NewGaps >= NumGaps) {
- LLVM_DEBUG(dbgs() << "Tagging non-progress ranges: ");
+ LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:");
assert(!ProgressRequired && "Didn't make progress when it was required.");
for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
if (IntvMap[I] == 1) {
setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
- LLVM_DEBUG(dbgs() << printReg(LREdit.get(I)));
+ LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I)));
}
LLVM_DEBUG(dbgs() << '\n');
}
@@ -2506,17 +2452,6 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
SA->analyze(&VirtReg);
- // FIXME: SplitAnalysis may repair broken live ranges coming from the
- // coalescer. That may cause the range to become allocatable which means that
- // tryRegionSplit won't be making progress. This check should be replaced with
- // an assertion when the coalescer is fixed.
- if (SA->didRepairRange()) {
- // VirtReg has changed, so all cached queries are invalid.
- Matrix->invalidateVirtRegs();
- if (Register PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters))
- return PhysReg;
- }
-
// First try to split around a region spanning multiple blocks. RS_Split2
// ranges already made dubious progress with region splitting, so they go
// straight to single block splitting.
@@ -2560,8 +2495,9 @@ bool RAGreedy::mayRecolorAllInterferences(
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
// If there is LastChanceRecoloringMaxInterference or more interferences,
// chances are one would not be recolorable.
- if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >=
- LastChanceRecoloringMaxInterference && !ExhaustiveSearch) {
+ if (Q.interferingVRegs(LastChanceRecoloringMaxInterference).size() >=
+ LastChanceRecoloringMaxInterference &&
+ !ExhaustiveSearch) {
LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");
CutOffInfo |= CO_Interf;
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 751f79e66b73..c847068bca90 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -932,12 +932,8 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// = B
// Update uses of IntA of the specific Val# with IntB.
- for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg()),
- UE = MRI->use_end();
- UI != UE;
- /* ++UI is below because of possible MI removal */) {
- MachineOperand &UseMO = *UI;
- ++UI;
+ for (MachineOperand &UseMO :
+ llvm::make_early_inc_range(MRI->use_operands(IntA.reg()))) {
if (UseMO.isUndef())
continue;
MachineInstr *UseMI = UseMO.getParent();
@@ -1573,9 +1569,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
// to describe DstReg instead.
if (MRI->use_nodbg_empty(SrcReg)) {
- for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg);
- UI != MRI->use_end();) {
- MachineOperand &UseMO = *UI++;
+ for (MachineOperand &UseMO :
+ llvm::make_early_inc_range(MRI->use_operands(SrcReg))) {
MachineInstr *UseMI = UseMO.getParent();
if (UseMI->isDebugInstr()) {
if (Register::isPhysicalRegister(DstReg))
@@ -3708,7 +3703,7 @@ void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF)
// vreg => DbgValueLoc map.
auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) {
for (auto *X : ToInsert) {
- for (auto Op : X->debug_operands()) {
+ for (const auto &Op : X->debug_operands()) {
if (Op.isReg() && Op.getReg().isVirtual())
DbgVRegToValues[Op.getReg()].push_back({Slot, X});
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
index e35cf7aa6958..c0a07ec4c91d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -495,21 +495,20 @@ RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj,
// Spill the scavenged register before \p Before.
int FI = Scavenged[SI].FrameIndex;
if (FI < FIB || FI >= FIE) {
- std::string Msg = std::string("Error while trying to spill ") +
- TRI->getName(Reg) + " from class " + TRI->getRegClassName(&RC) +
- ": Cannot scavenge register without an emergency spill slot!";
- report_fatal_error(Msg.c_str());
+ report_fatal_error(Twine("Error while trying to spill ") +
+ TRI->getName(Reg) + " from class " +
+ TRI->getRegClassName(&RC) +
+ ": Cannot scavenge register without an emergency "
+ "spill slot!");
}
- TII->storeRegToStackSlot(*MBB, Before, Reg, true, Scavenged[SI].FrameIndex,
- &RC, TRI);
+ TII->storeRegToStackSlot(*MBB, Before, Reg, true, FI, &RC, TRI);
MachineBasicBlock::iterator II = std::prev(Before);
unsigned FIOperandNum = getFrameIndexOperandNum(*II);
TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
// Restore the scavenged register before its use (or first terminator).
- TII->loadRegFromStackSlot(*MBB, UseMI, Reg, Scavenged[SI].FrameIndex,
- &RC, TRI);
+ TII->loadRegFromStackSlot(*MBB, UseMI, Reg, FI, &RC, TRI);
II = std::prev(UseMI);
FIOperandNum = getFrameIndexOperandNum(*II);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index 1619381967c4..0ff045fa787e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -70,7 +70,7 @@ static bool replaceWithTLIFunction(CallInst &CI, const StringRef TLIName) {
// Replace the call to the vector intrinsic with a call
// to the corresponding function from the vector library.
IRBuilder<> IRBuilder(&CI);
- SmallVector<Value *> Args(CI.arg_operands());
+ SmallVector<Value *> Args(CI.args());
// Preserve the operand bundles.
SmallVector<OperandBundleDef, 1> OpBundles;
CI.getOperandBundlesAsDefs(OpBundles);
@@ -106,7 +106,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
// all vector operands have identical vector width.
ElementCount VF = ElementCount::getFixed(0);
SmallVector<Type *> ScalarTypes;
- for (auto Arg : enumerate(CI.arg_operands())) {
+ for (auto Arg : enumerate(CI.args())) {
auto *ArgType = Arg.value()->getType();
// Vector calls to intrinsics can still have
// scalar operands for specific arguments.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
index 94add920f284..50d9d64bfcfd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
@@ -147,7 +147,7 @@ class SafeStack {
///
/// 16 seems like a reasonable upper bound on the alignment of objects that we
/// might expect to appear on the stack on most common targets.
- enum { StackAlignment = 16 };
+ static constexpr uint64_t StackAlignment = 16;
/// Return the value of the stack canary.
Value *getStackGuard(IRBuilder<> &IRB, Function &F);
@@ -221,6 +221,8 @@ public:
bool run();
};
+constexpr uint64_t SafeStack::StackAlignment;
+
uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) {
uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType());
if (AI->isArrayAllocation()) {
@@ -519,7 +521,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
StackLayout SSL(StackAlignment);
if (StackGuardSlot) {
Type *Ty = StackGuardSlot->getAllocatedType();
- unsigned Align =
+ uint64_t Align =
std::max(DL.getPrefTypeAlignment(Ty), StackGuardSlot->getAlignment());
SSL.addObject(StackGuardSlot, getStaticAllocaAllocationSize(StackGuardSlot),
Align, SSC.getFullLiveRange());
@@ -532,8 +534,8 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
Size = 1; // Don't create zero-sized stack objects.
// Ensure the object is properly aligned.
- unsigned Align = std::max((unsigned)DL.getPrefTypeAlignment(Ty),
- Arg->getParamAlignment());
+ uint64_t Align =
+ std::max(DL.getPrefTypeAlignment(Ty), Arg->getParamAlignment());
SSL.addObject(Arg, Size, Align, SSC.getFullLiveRange());
}
@@ -544,21 +546,20 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
Size = 1; // Don't create zero-sized stack objects.
// Ensure the object is properly aligned.
- unsigned Align =
- std::max((unsigned)DL.getPrefTypeAlignment(Ty), AI->getAlignment());
+ uint64_t Align = std::max(DL.getPrefTypeAlignment(Ty), AI->getAlignment());
SSL.addObject(AI, Size, Align,
ClColoring ? SSC.getLiveRange(AI) : NoColoringRange);
}
SSL.computeLayout();
- unsigned FrameAlignment = SSL.getFrameAlignment();
+ uint64_t FrameAlignment = SSL.getFrameAlignment();
// FIXME: tell SSL that we start at a less-then-MaxAlignment aligned location
// (AlignmentSkew).
if (FrameAlignment > StackAlignment) {
// Re-align the base pointer according to the max requested alignment.
- assert(isPowerOf2_32(FrameAlignment));
+ assert(isPowerOf2_64(FrameAlignment));
IRB.SetInsertPoint(BasePointer->getNextNode());
BasePointer = cast<Instruction>(IRB.CreateIntToPtr(
IRB.CreateAnd(IRB.CreatePtrToInt(BasePointer, IntPtrTy),
@@ -676,9 +677,9 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
SP = IRB.CreateSub(SP, Size);
// Align the SP value to satisfy the AllocaInst, type and stack alignments.
- unsigned Align = std::max(
- std::max((unsigned)DL.getPrefTypeAlignment(Ty), AI->getAlignment()),
- (unsigned)StackAlignment);
+ uint64_t Align =
+ std::max(std::max(DL.getPrefTypeAlignment(Ty), AI->getAlignment()),
+ StackAlignment);
assert(isPowerOf2_32(Align));
Value *NewTop = IRB.CreateIntToPtr(
@@ -701,9 +702,8 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
if (!DynamicAllocas.empty()) {
// Now go through the instructions again, replacing stacksave/stackrestore.
- for (inst_iterator It = inst_begin(&F), Ie = inst_end(&F); It != Ie;) {
- Instruction *I = &*(It++);
- auto II = dyn_cast<IntrinsicInst>(I);
+ for (Instruction &I : llvm::make_early_inc_range(instructions(&F))) {
+ auto *II = dyn_cast<IntrinsicInst>(&I);
if (!II)
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp
index 5d61b3a146b4..7cdda7743c16 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp
@@ -37,7 +37,7 @@ LLVM_DUMP_METHOD void StackLayout::print(raw_ostream &OS) {
}
}
-void StackLayout::addObject(const Value *V, unsigned Size, unsigned Alignment,
+void StackLayout::addObject(const Value *V, unsigned Size, uint64_t Alignment,
const StackLifetime::LiveRange &Range) {
StackObjects.push_back({V, Size, Alignment, Range});
ObjectAlignments[V] = Alignment;
@@ -45,7 +45,7 @@ void StackLayout::addObject(const Value *V, unsigned Size, unsigned Alignment,
}
static unsigned AdjustStackOffset(unsigned Offset, unsigned Size,
- unsigned Alignment) {
+ uint64_t Alignment) {
return alignTo(Offset + Size, Alignment) - Size;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h
index f0db1b42aa00..b72450e57080 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h
@@ -22,7 +22,7 @@ namespace safestack {
/// Compute the layout of an unsafe stack frame.
class StackLayout {
- unsigned MaxAlignment;
+ uint64_t MaxAlignment;
struct StackRegion {
unsigned Start;
@@ -39,23 +39,24 @@ class StackLayout {
struct StackObject {
const Value *Handle;
- unsigned Size, Alignment;
+ unsigned Size;
+ uint64_t Alignment;
StackLifetime::LiveRange Range;
};
SmallVector<StackObject, 8> StackObjects;
DenseMap<const Value *, unsigned> ObjectOffsets;
- DenseMap<const Value *, unsigned> ObjectAlignments;
+ DenseMap<const Value *, uint64_t> ObjectAlignments;
void layoutObject(StackObject &Obj);
public:
- StackLayout(unsigned StackAlignment) : MaxAlignment(StackAlignment) {}
+ StackLayout(uint64_t StackAlignment) : MaxAlignment(StackAlignment) {}
/// Add an object to the stack frame. Value pointer is opaque and used as a
/// handle to retrieve the object's offset in the frame later.
- void addObject(const Value *V, unsigned Size, unsigned Alignment,
+ void addObject(const Value *V, unsigned Size, uint64_t Alignment,
const StackLifetime::LiveRange &Range);
/// Run the layout computation for all previously added objects.
@@ -65,13 +66,13 @@ public:
unsigned getObjectOffset(const Value *V) { return ObjectOffsets[V]; }
/// Returns the alignment of the object
- unsigned getObjectAlignment(const Value *V) { return ObjectAlignments[V]; }
+ uint64_t getObjectAlignment(const Value *V) { return ObjectAlignments[V]; }
/// Returns the size of the entire frame.
unsigned getFrameSize() { return Regions.empty() ? 0 : Regions.back().End; }
/// Returns the alignment of the frame.
- unsigned getFrameAlignment() { return MaxAlignment; }
+ uint64_t getFrameAlignment() { return MaxAlignment; }
void print(raw_ostream &OS);
};
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp
index 60f8eec1b9bc..ef3afab2b730 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -577,8 +577,7 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
SU = WorkList.back();
WorkList.pop_back();
Visited.set(SU->NodeNum);
- for (const SDep &SuccDep
- : make_range(SU->Succs.rbegin(), SU->Succs.rend())) {
+ for (const SDep &SuccDep : llvm::reverse(SU->Succs)) {
unsigned s = SuccDep.getSUnit()->NodeNum;
// Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
if (s >= Node2Index.size())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index daff3af3bc3c..3f013eb6024e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -271,15 +271,10 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
RegUse, UseOp));
- ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep);
} else {
Dep.setLatency(0);
- // FIXME: We could always let target to adjustSchedDependency(), and
- // remove this condition, but that currently asserts in Hexagon BE.
- if (SU->getInstr()->isBundle() || (RegUse && RegUse->isBundle()))
- ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep);
}
-
+ ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep);
UseSU->addPred(Dep);
}
}
@@ -1117,7 +1112,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
LiveRegs.addLiveOuts(MBB);
// Examine block from end to start...
- for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
if (MI.isDebugOrPseudoInstr())
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index dc245f0d7b16..ce400ea43f29 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -129,12 +129,12 @@ static cl::opt<unsigned> StoreMergeDependenceLimit(
static cl::opt<bool> EnableReduceLoadOpStoreWidth(
"combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
- cl::desc("DAG cominber enable reducing the width of load/op/store "
+ cl::desc("DAG combiner enable reducing the width of load/op/store "
"sequence"));
static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
"combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
- cl::desc("DAG cominber enable load/<replace bytes>/store with "
+ cl::desc("DAG combiner enable load/<replace bytes>/store with "
"a narrower store"));
namespace {
@@ -319,7 +319,7 @@ namespace {
/// If so, return true.
bool SimplifyDemandedBits(SDValue Op) {
unsigned BitWidth = Op.getScalarValueSizeInBits();
- APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
+ APInt DemandedBits = APInt::getAllOnes(BitWidth);
return SimplifyDemandedBits(Op, DemandedBits);
}
@@ -345,7 +345,7 @@ namespace {
return false;
unsigned NumElts = Op.getValueType().getVectorNumElements();
- APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ APInt DemandedElts = APInt::getAllOnes(NumElts);
return SimplifyDemandedVectorElts(Op, DemandedElts);
}
@@ -436,7 +436,7 @@ namespace {
SDValue visitOR(SDNode *N);
SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitXOR(SDNode *N);
- SDValue SimplifyVBinOp(SDNode *N);
+ SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
SDValue visitSHL(SDNode *N);
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
@@ -515,6 +515,7 @@ namespace {
SDValue visitFP_TO_FP16(SDNode *N);
SDValue visitFP16_TO_FP(SDNode *N);
SDValue visitVECREDUCE(SDNode *N);
+ SDValue visitVPOp(SDNode *N);
SDValue visitFADDForFMACombine(SDNode *N);
SDValue visitFSUBForFMACombine(SDNode *N);
@@ -615,7 +616,7 @@ namespace {
SmallVectorImpl<SDValue> &Aliases);
/// Return true if there is any possibility that the two addresses overlap.
- bool isAlias(SDNode *Op0, SDNode *Op1) const;
+ bool mayAlias(SDNode *Op0, SDNode *Op1) const;
/// Walk up chain skipping non-aliasing memory nodes, looking for a better
/// chain (aliasing node.)
@@ -1062,21 +1063,22 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
if (N0.getOpcode() != Opc)
return SDValue();
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
- if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) {
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {
// Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
- if (SDValue OpNode =
- DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
- return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+ if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
+ return DAG.getNode(Opc, DL, VT, N00, OpNode);
return SDValue();
}
if (N0.hasOneUse()) {
// Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
// iff (op x, c1) has one use
- SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
- if (!OpNode.getNode())
- return SDValue();
- return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
+ if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1))
+ return DAG.getNode(Opc, DL, VT, OpNode, N01);
+ return SDValue();
}
}
return SDValue();
@@ -1738,6 +1740,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
+#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
+#include "llvm/IR/VPIntrinsics.def"
+ return visitVPOp(N);
}
return SDValue();
}
@@ -2257,7 +2262,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (add x, 0) -> x, vector edition
@@ -2781,7 +2786,7 @@ static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
IsFlip = Const->isOne();
break;
case TargetLowering::ZeroOrNegativeOneBooleanContent:
- IsFlip = Const->isAllOnesValue();
+ IsFlip = Const->isAllOnes();
break;
case TargetLowering::UndefinedBooleanContent:
IsFlip = (Const->getAPIntValue() & 0x01) == 1;
@@ -3257,7 +3262,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (sub x, 0) -> x, vector edition
@@ -3315,11 +3320,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
// Convert 0 - abs(x).
- SDValue Result;
if (N1->getOpcode() == ISD::ABS &&
- !TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
- TLI.expandABS(N1.getNode(), Result, DAG, true))
- return Result;
+ !TLI.isOperationLegalOrCustom(ISD::ABS, VT))
+ if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
+ return Result;
// Fold neg(splat(neg(x)) -> splat(x)
if (VT.isVector()) {
@@ -3783,7 +3787,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
@@ -3808,18 +3812,18 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
// fold (mul x, 0) -> 0
- if (N1IsConst && ConstValue1.isNullValue())
+ if (N1IsConst && ConstValue1.isZero())
return N1;
// fold (mul x, 1) -> x
- if (N1IsConst && ConstValue1.isOneValue())
+ if (N1IsConst && ConstValue1.isOne())
return N0;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
// fold (mul x, -1) -> 0-x
- if (N1IsConst && ConstValue1.isAllOnesValue()) {
+ if (N1IsConst && ConstValue1.isAllOnes()) {
SDLoc DL(N);
return DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, VT), N0);
@@ -3837,7 +3841,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
- if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
+ if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
unsigned Log2Val = (-ConstValue1).logBase2();
SDLoc DL(N);
// FIXME: If the input is something that is easily negated (e.g. a
@@ -3966,7 +3970,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
SmallBitVector ClearMask;
ClearMask.reserve(NumElts);
auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
- if (!V || V->isNullValue()) {
+ if (!V || V->isZero()) {
ClearMask.push_back(true);
return true;
}
@@ -4052,9 +4056,7 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) {
SDValue Op0 = Node->getOperand(0);
SDValue Op1 = Node->getOperand(1);
SDValue combined;
- for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
- UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (SDNode *User : Op0.getNode()->uses()) {
if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
User->use_empty())
continue;
@@ -4111,7 +4113,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
// 0 / X -> 0
// 0 % X -> 0
ConstantSDNode *N0C = isConstOrConstSplat(N0);
- if (N0C && N0C->isNullValue())
+ if (N0C && N0C->isZero())
return N0;
// X / X -> 1
@@ -4136,21 +4138,20 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT CCVT = getSetCCResultType(VT);
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
- SDLoc DL(N);
-
// fold (sdiv c1, c2) -> c1/c2
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
return C;
// fold (sdiv X, -1) -> 0-X
- if (N1C && N1C->isAllOnesValue())
+ if (N1C && N1C->isAllOnes())
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
// fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
@@ -4204,11 +4205,11 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// Helper for determining whether a value is a power-2 constant scalar or a
// vector of such elements.
auto IsPowerOfTwo = [](ConstantSDNode *C) {
- if (C->isNullValue() || C->isOpaque())
+ if (C->isZero() || C->isOpaque())
return false;
if (C->getAPIntValue().isPowerOf2())
return true;
- if ((-C->getAPIntValue()).isPowerOf2())
+ if (C->getAPIntValue().isNegatedPowerOf2())
return true;
return false;
};
@@ -4281,21 +4282,20 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT CCVT = getSetCCResultType(VT);
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
- SDLoc DL(N);
-
// fold (udiv c1, c2) -> c1/c2
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
return C;
// fold (udiv X, -1) -> select(X == -1, 1, 0)
- if (N1C && N1C->getAPIntValue().isAllOnesValue())
+ if (N1C && N1C->isAllOnes())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
DAG.getConstant(1, DL, VT),
DAG.getConstant(0, DL, VT));
@@ -4391,7 +4391,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
return C;
// fold (urem X, -1) -> select(X == -1, 0, x)
- if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
+ if (!isSigned && N1C && N1C->isAllOnes())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
DAG.getConstant(0, DL, VT), N0);
@@ -4475,6 +4475,11 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
return C;
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
+
// fold (mulhs x, 0) -> 0
if (isNullConstant(N1))
return N1;
@@ -4527,6 +4532,11 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
return C;
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
+
// fold (mulhu x, 0) -> 0
if (isNullConstant(N1))
return N1;
@@ -4567,6 +4577,12 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
}
}
+ // Simplify the operands using demanded-bits information.
+ // We don't have demanded bits support for MULHU so this just enables constant
+ // folding based on known bits.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -4768,20 +4784,21 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
unsigned Opcode = N->getOpcode();
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold operation with constant operands.
- if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
return C;
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
+ return DAG.getNode(N->getOpcode(), DL, VT, N1, N0);
// Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
// Only do this if the current op isn't legal and the flipped is.
@@ -4797,7 +4814,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
default: llvm_unreachable("Unknown MINMAX opcode");
}
if (TLI.isOperationLegal(AltOpcode, VT))
- return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
+ return DAG.getNode(AltOpcode, DL, VT, N0, N1);
}
// Simplify the operands using demanded-bits information.
@@ -5607,6 +5624,39 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
return DAG.getZExtOrTrunc(Setcc, DL, VT);
}
+/// For targets that support usubsat, match a bit-hack form of that operation
+/// that ends in 'and' and convert it.
+static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N1.getValueType();
+
+ // Canonicalize SRA as operand 1.
+ if (N0.getOpcode() == ISD::SRA)
+ std::swap(N0, N1);
+
+ // xor/add with SMIN (signmask) are logically equivalent.
+ if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() ||
+ N0.getOperand(0) != N1.getOperand(0))
+ return SDValue();
+
+ unsigned BitWidth = VT.getScalarSizeInBits();
+ ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true);
+ ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true);
+ if (!XorC || !XorC->getAPIntValue().isSignMask() ||
+ !SraC || SraC->getAPIntValue() != BitWidth - 1)
+ return SDValue();
+
+ // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
+ // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
+ SDLoc DL(N);
+ SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT);
+ return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5618,17 +5668,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
// fold (and x, 0) -> 0, vector edition
if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
// do not return N0, because undef node may exist in N0
- return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
+ return DAG.getConstant(APInt::getZero(N0.getScalarValueSizeInBits()),
SDLoc(N), N0.getValueType());
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
// do not return N1, because undef node may exist in N1
- return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
+ return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
SDLoc(N), N1.getValueType());
// fold (and x, -1) -> x, vector edition
@@ -5679,8 +5729,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// if (and x, c) is known to be zero, return 0
unsigned BitWidth = VT.getScalarSizeInBits();
- if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
- APInt::getAllOnesValue(BitWidth)))
+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
return DAG.getConstant(0, SDLoc(N), VT);
if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -5742,7 +5791,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Get the constant (if applicable) the zero'th operand is being ANDed with.
// This can be a pure constant or a vector splat, in which case we treat the
// vector as a scalar and use the splat value.
- APInt Constant = APInt::getNullValue(1);
+ APInt Constant = APInt::getZero(1);
if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
Constant = C->getAPIntValue();
} else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
@@ -5773,7 +5822,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
// multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
if ((SplatBitSize % EltBitWidth) == 0) {
- Constant = APInt::getAllOnesValue(EltBitWidth);
+ Constant = APInt::getAllOnes(EltBitWidth);
for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
}
@@ -5800,7 +5849,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
case ISD::NON_EXTLOAD: B = true; break;
}
- if (B && Constant.isAllOnesValue()) {
+ if (B && Constant.isAllOnes()) {
// If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
// preserve semantics once we get rid of the AND.
SDValue NewLoad(Load, 0);
@@ -5970,6 +6019,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (IsAndZeroExtMask(N0, N1))
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
+ if (hasOperation(ISD::USUBSAT, VT))
+ if (SDValue V = foldAndToUsubsat(N, DAG))
+ return V;
+
return SDValue();
}
@@ -6384,7 +6437,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
// fold (or x, 0) -> x, vector edition
@@ -6925,17 +6978,16 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
// with different shifted sources.
SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
- // Must be a legal type. Expanded 'n promoted things won't work with rotates.
EVT VT = LHS.getValueType();
- if (!TLI.isTypeLegal(VT))
- return SDValue();
// The target must have at least one rotate/funnel flavor.
+ // We still try to match rotate by constant pre-legalization.
+ // TODO: Support pre-legalization funnel-shift by constant.
bool HasROTL = hasOperation(ISD::ROTL, VT);
bool HasROTR = hasOperation(ISD::ROTR, VT);
bool HasFSHL = hasOperation(ISD::FSHL, VT);
bool HasFSHR = hasOperation(ISD::FSHR, VT);
- if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
+ if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
return SDValue();
// Check for truncated rotate.
@@ -6988,6 +7040,7 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
if (LHSShift.getOpcode() == RHSShift.getOpcode())
return SDValue(); // Shifts must disagree.
+ // TODO: Support pre-legalization funnel-shift by constant.
bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
if (!IsRotate && !(HasFSHL || HasFSHR))
return SDValue(); // Requires funnel shift support.
@@ -7016,12 +7069,15 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
};
if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
SDValue Res;
- if (IsRotate && (HasROTL || HasROTR))
- Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
- HasROTL ? LHSShiftAmt : RHSShiftAmt);
- else
- Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
- RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
+ if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
+ bool UseROTL = !LegalOperations || HasROTL;
+ Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
+ UseROTL ? LHSShiftAmt : RHSShiftAmt);
+ } else {
+ bool UseFSHL = !LegalOperations || HasFSHL;
+ Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
+ RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
+ }
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
@@ -7045,6 +7101,11 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
return Res;
}
+ // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
+ // shift.
+ if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
+ return SDValue();
+
// If there is a mask here, and we have a variable shift, we can't be sure
// that we're masking out the right stuff.
if (LHSMask.getNode() || RHSMask.getNode())
@@ -7296,7 +7357,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
// TODO: If there is evidence that running this later would help, this
// limitation could be removed. Legality checks may need to be added
// for the created store and optional bswap/rotate.
- if (LegalOperations)
+ if (LegalOperations || OptLevel == CodeGenOpt::None)
return SDValue();
// We only handle merging simple stores of 1-4 bytes.
@@ -7671,9 +7732,12 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// | D |
// Into:
// (x & m) | (y & ~m)
-// If y is a constant, and the 'andn' does not work with immediates,
-// we unfold into a different pattern:
+// If y is a constant, m is not a 'not', and the 'andn' does not work with
+// immediates, we unfold into a different pattern:
// ~(~x & m) & (m | y)
+// If x is a constant, m is a 'not', and the 'andn' does not work with
+// immediates, we unfold into a different pattern:
+// (x | ~m) & ~(~m & ~y)
// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
// the very least that breaks andnpd / andnps patterns, and because those
// patterns are simplified in IR and shouldn't be created in the DAG
@@ -7728,8 +7792,9 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
SDLoc DL(N);
- // If Y is a constant, check that 'andn' works with immediates.
- if (!TLI.hasAndNot(Y)) {
+ // If Y is a constant, check that 'andn' works with immediates. Unless M is
+ // a bitwise not that would already allow ANDN to be used.
+ if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
// If not, we need to do a bit more work to make sure andn is still used.
SDValue NotX = DAG.getNOT(DL, X, VT);
@@ -7739,6 +7804,19 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
}
+ // If X is a constant and M is a bitwise not, check that 'andn' works with
+ // immediates.
+ if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
+ assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
+ // If not, we need to do a bit more work to make sure andn is still used.
+ SDValue NotM = M.getOperand(0);
+ SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
+ SDValue NotY = DAG.getNOT(DL, Y, VT);
+ SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
+ SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
+ return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
+ }
+
SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
SDValue NotM = DAG.getNOT(DL, M, VT);
SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
@@ -7750,10 +7828,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (xor x, 0) -> x, vector edition
@@ -7764,7 +7843,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
// fold (xor undef, undef) -> 0. This is a common idiom (misuse).
- SDLoc DL(N);
if (N0.isUndef() && N1.isUndef())
return DAG.getConstant(0, DL, VT);
@@ -7899,7 +7977,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// shift has been simplified to undef.
uint64_t ShiftAmt = ShiftC->getLimitedValue();
if (ShiftAmt < BitWidth) {
- APInt Ones = APInt::getAllOnesValue(BitWidth);
+ APInt Ones = APInt::getAllOnes(BitWidth);
Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
if (XorC->getAPIntValue() == Ones) {
// If the xor constant is a shifted -1, do a 'not' before the shift:
@@ -8222,7 +8300,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
@@ -8255,8 +8333,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return NewSel;
// if (shl x, c) is known to be zero, return 0
- if (DAG.MaskedValueIsZero(SDValue(N, 0),
- APInt::getAllOnesValue(OpSizeInBits)))
+ if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
return DAG.getConstant(0, SDLoc(N), VT);
// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
@@ -8501,28 +8578,43 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
// Both operands must be equivalent extend nodes.
SDValue LeftOp = ShiftOperand.getOperand(0);
SDValue RightOp = ShiftOperand.getOperand(1);
+
bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
- if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
+ if (!IsSignExt && !IsZeroExt)
return SDValue();
- EVT WideVT1 = LeftOp.getValueType();
- EVT WideVT2 = RightOp.getValueType();
- (void)WideVT2;
+ EVT NarrowVT = LeftOp.getOperand(0).getValueType();
+ unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
+
+ SDValue MulhRightOp;
+ if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
+ unsigned ActiveBits = IsSignExt
+ ? Constant->getAPIntValue().getMinSignedBits()
+ : Constant->getAPIntValue().getActiveBits();
+ if (ActiveBits > NarrowVTSize)
+ return SDValue();
+ MulhRightOp = DAG.getConstant(
+ Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
+ NarrowVT);
+ } else {
+ if (LeftOp.getOpcode() != RightOp.getOpcode())
+ return SDValue();
+ // Check that the two extend nodes are the same type.
+ if (NarrowVT != RightOp.getOperand(0).getValueType())
+ return SDValue();
+ MulhRightOp = RightOp.getOperand(0);
+ }
+
+ EVT WideVT = LeftOp.getValueType();
// Proceed with the transformation if the wide types match.
- assert((WideVT1 == WideVT2) &&
+ assert((WideVT == RightOp.getValueType()) &&
"Cannot have a multiply node with two different operand types.");
- EVT NarrowVT = LeftOp.getOperand(0).getValueType();
- // Check that the two extend nodes are the same type.
- if (NarrowVT != RightOp.getOperand(0).getValueType())
- return SDValue();
-
// Proceed with the transformation if the wide type is twice as large
// as the narrow type.
- unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
- if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
+ if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
return SDValue();
// Check the shift amount with the narrow type size.
@@ -8540,10 +8632,10 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
return SDValue();
- SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
- RightOp.getOperand(0));
- return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
- : DAG.getZExtOrTrunc(Result, DL, WideVT1));
+ SDValue Result =
+ DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
+ return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT)
+ : DAG.getZExtOrTrunc(Result, DL, WideVT));
}
SDValue DAGCombiner::visitSRA(SDNode *N) {
@@ -8563,7 +8655,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
ConstantSDNode *N1C = isConstOrConstSplat(N1);
@@ -8761,7 +8853,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
ConstantSDNode *N1C = isConstOrConstSplat(N1);
@@ -8774,8 +8866,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return NewSel;
// if (srl x, c) is known to be zero, return 0
- if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
- APInt::getAllOnesValue(OpSizeInBits)))
+ if (N1C &&
+ DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
return DAG.getConstant(0, SDLoc(N), VT);
// fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
@@ -9357,27 +9449,27 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
// is also a target-independent combine here in DAGCombiner in the other
// direction for (select Cond, -1, 0) when the condition is not i1.
if (CondVT == MVT::i1 && !LegalOperations) {
- if (C1->isNullValue() && C2->isOne()) {
+ if (C1->isZero() && C2->isOne()) {
// select Cond, 0, 1 --> zext (!Cond)
SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
if (VT != MVT::i1)
NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
return NotCond;
}
- if (C1->isNullValue() && C2->isAllOnesValue()) {
+ if (C1->isZero() && C2->isAllOnes()) {
// select Cond, 0, -1 --> sext (!Cond)
SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
if (VT != MVT::i1)
NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
return NotCond;
}
- if (C1->isOne() && C2->isNullValue()) {
+ if (C1->isOne() && C2->isZero()) {
// select Cond, 1, 0 --> zext (Cond)
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
return Cond;
}
- if (C1->isAllOnesValue() && C2->isNullValue()) {
+ if (C1->isAllOnes() && C2->isZero()) {
// select Cond, -1, 0 --> sext (Cond)
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
@@ -9405,7 +9497,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
}
// select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
- if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
+ if (C1Val.isPowerOf2() && C2Val.isZero()) {
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
@@ -9433,7 +9525,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
TargetLowering::ZeroOrOneBooleanContent &&
TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
TargetLowering::ZeroOrOneBooleanContent &&
- C1->isNullValue() && C2->isOne()) {
+ C1->isZero() && C2->isOne()) {
SDValue NotCond =
DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
if (VT.bitsEq(CondVT))
@@ -9478,6 +9570,64 @@ static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
+ return SDValue();
+
+ SDValue Cond0 = N0.getOperand(0);
+ SDValue Cond1 = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ if (VT != Cond0.getValueType())
+ return SDValue();
+
+ // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
+ // compare is inverted from that pattern ("Cond0 s> -1").
+ if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
+ ; // This is the pattern we are looking for.
+ else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
+ std::swap(N1, N2);
+ else
+ return SDValue();
+
+ // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
+ if (isNullOrNullSplat(N2)) {
+ SDLoc DL(N);
+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
+ return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
+ }
+
+ // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
+ if (isAllOnesOrAllOnesSplat(N1)) {
+ SDLoc DL(N);
+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
+ return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
+ }
+
+ // If we have to invert the sign bit mask, only do that transform if the
+ // target has a bitwise 'and not' instruction (the invert is free).
+ // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
+ SDLoc DL(N);
+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
+ SDValue Not = DAG.getNOT(DL, Sra, VT);
+ return DAG.getNode(ISD::AND, DL, VT, Not, N2);
+ }
+
+ // TODO: There's another pattern in this family, but it may require
+ // implementing hasOrNot() to check for profitability:
+ // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -9702,8 +9852,8 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
"same value. This should have been addressed before this function.");
return DAG.getNode(
ISD::CONCAT_VECTORS, DL, VT,
- BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
- TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
+ BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
+ TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
}
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
@@ -10168,6 +10318,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (SDValue V = foldVSelectOfConstants(N))
return V;
+ if (hasOperation(ISD::SRA, VT))
+ if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG))
+ return V;
+
return SDValue();
}
@@ -10189,7 +10343,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
AddToWorklist(SCC.getNode());
if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
- if (!SCCC->isNullValue())
+ if (!SCCC->isZero())
return N2; // cond always true -> true val
else
return N3; // cond always false -> false val
@@ -10247,13 +10401,13 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
// Is 'X Cond C' always true or false?
auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
- bool False = (Cond == ISD::SETULT && C->isNullValue()) ||
+ bool False = (Cond == ISD::SETULT && C->isZero()) ||
(Cond == ISD::SETLT && C->isMinSignedValue()) ||
- (Cond == ISD::SETUGT && C->isAllOnesValue()) ||
+ (Cond == ISD::SETUGT && C->isAllOnes()) ||
(Cond == ISD::SETGT && C->isMaxSignedValue());
- bool True = (Cond == ISD::SETULE && C->isAllOnesValue()) ||
+ bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
(Cond == ISD::SETLE && C->isMaxSignedValue()) ||
- (Cond == ISD::SETUGE && C->isNullValue()) ||
+ (Cond == ISD::SETUGE && C->isZero()) ||
(Cond == ISD::SETGE && C->isMinSignedValue());
return True || False;
};
@@ -10862,7 +11016,7 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
return SDValue();
- if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
+ if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
return SDValue();
if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
@@ -11256,7 +11410,7 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
Known = DAG.computeKnownBits(Op);
- return (Known.Zero | 1).isAllOnesValue();
+ return (Known.Zero | 1).isAllOnes();
}
/// Given an extending node with a pop-count operand, if the target does not
@@ -12015,7 +12169,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
// If the input is already sign extended, just drop the extension.
- if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
+ if (ExtVTBits >= DAG.ComputeMinSignedBits(N0))
return N0;
// fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
@@ -12031,8 +12185,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N00 = N0.getOperand(0);
unsigned N00Bits = N00.getScalarValueSizeInBits();
- if ((N00Bits <= ExtVTBits ||
- (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
+ if ((N00Bits <= ExtVTBits || DAG.ComputeMinSignedBits(N00) <= ExtVTBits) &&
(!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
}
@@ -12051,8 +12204,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
if ((N00Bits == ExtVTBits ||
(!IsZext && (N00Bits < ExtVTBits ||
- (N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) <
- ExtVTBits))) &&
+ DAG.ComputeMinSignedBits(N00) <= ExtVTBits))) &&
(!LegalOperations ||
TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
@@ -12289,7 +12441,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue Amt = N0.getOperand(1);
KnownBits Known = DAG.computeKnownBits(Amt);
unsigned Size = VT.getScalarSizeInBits();
- if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
+ if (Known.countMaxActiveBits() <= Log2_32(Size)) {
SDLoc SL(N);
EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
@@ -12537,8 +12689,8 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
assert(N->getOpcode() == ISD::BUILD_PAIR);
- LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
- LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+ auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
+ auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
// A BUILD_PAIR is always having the least significant part in elt 0 and the
// most significant part in elt 1. So when combining into one large load, we
@@ -12546,22 +12698,20 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
if (DAG.getDataLayout().isBigEndian())
std::swap(LD1, LD2);
- if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
+ if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
+ !LD1->hasOneUse() || !LD2->hasOneUse() ||
LD1->getAddressSpace() != LD2->getAddressSpace())
return SDValue();
+
+ bool LD1Fast = false;
EVT LD1VT = LD1->getValueType(0);
unsigned LD1Bytes = LD1VT.getStoreSize();
- if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
- DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
- Align Alignment = LD1->getAlign();
- Align NewAlign = DAG.getDataLayout().getABITypeAlign(
- VT.getTypeForEVT(*DAG.getContext()));
-
- if (NewAlign <= Alignment &&
- (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
- return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
- LD1->getPointerInfo(), Alignment);
- }
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
+ DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
+ TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
+ return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
+ LD1->getPointerInfo(), LD1->getAlign());
return SDValue();
}
@@ -12937,69 +13087,45 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
}
- SDLoc DL(BV);
-
// Okay, we know the src/dst types are both integers of differing types.
- // Handling growing first.
assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
- if (SrcBitSize < DstBitSize) {
- unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
- SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0, e = BV->getNumOperands(); i != e;
- i += NumInputsPerOutput) {
- bool isLE = DAG.getDataLayout().isLittleEndian();
- APInt NewBits = APInt(DstBitSize, 0);
- bool EltIsUndef = true;
- for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
- // Shift the previously computed bits over.
- NewBits <<= SrcBitSize;
- SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
- if (Op.isUndef()) continue;
- EltIsUndef = false;
-
- NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
- zextOrTrunc(SrcBitSize).zext(DstBitSize);
- }
+ // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
+ // BuildVectorSDNode?
+ auto *BVN = cast<BuildVectorSDNode>(BV);
- if (EltIsUndef)
- Ops.push_back(DAG.getUNDEF(DstEltVT));
- else
- Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
- }
-
- EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
- return DAG.getBuildVector(VT, DL, Ops);
- }
+ // Extract the constant raw bit data.
+ BitVector UndefElements;
+ SmallVector<APInt> RawBits;
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
+ if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
+ return SDValue();
- // Finally, this must be the case where we are shrinking elements: each input
- // turns into multiple outputs.
- unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
- EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
- NumOutputsPerInput*BV->getNumOperands());
+ SDLoc DL(BV);
SmallVector<SDValue, 8> Ops;
+ for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
+ if (UndefElements[I])
+ Ops.push_back(DAG.getUNDEF(DstEltVT));
+ else
+ Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
+ }
- for (const SDValue &Op : BV->op_values()) {
- if (Op.isUndef()) {
- Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
- continue;
- }
-
- APInt OpVal = cast<ConstantSDNode>(Op)->
- getAPIntValue().zextOrTrunc(SrcBitSize);
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
+ return DAG.getBuildVector(VT, DL, Ops);
+}
- for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
- APInt ThisVal = OpVal.trunc(DstBitSize);
- Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
- OpVal.lshrInPlace(DstBitSize);
- }
+// Returns true if floating point contraction is allowed on the FMUL-SDValue
+// `N`
+static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
+ assert(N.getOpcode() == ISD::FMUL);
- // For big endian targets, swap the order of the pieces of each element.
- if (DAG.getDataLayout().isBigEndian())
- std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
- }
+ return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
+ N->getFlags().hasAllowContract();
+}
- return DAG.getBuildVector(VT, DL, Ops);
+// Returns true if `N` can assume no infinities involved in its computation.
+static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
+ return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs();
}
/// Try to perform FMA combining on a given FADD node.
@@ -13038,6 +13164,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+ auto isFusedOp = [&](SDValue N) {
+ unsigned Opcode = N.getOpcode();
+ return Opcode == ISD::FMA || Opcode == ISD::FMAD;
+ };
+
// Is the node an FMUL and contractable either due to global flags or
// SDNodeFlags.
auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
@@ -13069,12 +13200,12 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
// This requires reassociation because it changes the order of operations.
SDValue FMA, E;
- if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&
+ if (CanReassociate && isFusedOp(N0) &&
N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
N0.getOperand(2).hasOneUse()) {
FMA = N0;
E = N1;
- } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&
+ } else if (CanReassociate && isFusedOp(N1) &&
N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
N1.getOperand(2).hasOneUse()) {
FMA = N1;
@@ -13130,7 +13261,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
Z));
};
- if (N0.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N0)) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
@@ -13160,7 +13291,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
};
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N00)) {
SDValue N002 = N00.getOperand(2);
if (isContractableFMUL(N002) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
@@ -13174,7 +13305,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fold (fadd x, (fma y, z, (fpext (fmul u, v)))
// -> (fma y, z, (fma (fpext u), (fpext v), x))
- if (N1.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N1)) {
SDValue N12 = N1.getOperand(2);
if (N12.getOpcode() == ISD::FP_EXTEND) {
SDValue N120 = N12.getOperand(0);
@@ -13195,7 +13326,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// interesting for all targets, especially GPUs.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N10)) {
SDValue N102 = N10.getOperand(2);
if (isContractableFMUL(N102) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
@@ -13391,12 +13522,17 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
return isContractableFMUL(N) && isReassociable(N.getNode());
};
+ auto isFusedOp = [&](SDValue N) {
+ unsigned Opcode = N.getOpcode();
+ return Opcode == ISD::FMA || Opcode == ISD::FMAD;
+ };
+
// More folding opportunities when target permits.
if (Aggressive && isReassociable(N)) {
bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))
- if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
+ if (CanFuse && isFusedOp(N0) &&
isContractableAndReassociableFMUL(N0.getOperand(2)) &&
N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
@@ -13409,7 +13545,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))
- if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
+ if (CanFuse && isFusedOp(N1) &&
isContractableAndReassociableFMUL(N1.getOperand(2)) &&
N1->hasOneUse() && NoSignedZero) {
SDValue N20 = N1.getOperand(2).getOperand(0);
@@ -13423,8 +13559,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
- if (N0.getOpcode() == PreferredFusedOpcode &&
- N0->hasOneUse()) {
+ if (isFusedOp(N0) && N0->hasOneUse()) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
@@ -13450,7 +13585,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// interesting for all targets, especially GPUs.
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N00)) {
SDValue N002 = N00.getOperand(2);
if (isContractableAndReassociableFMUL(N002) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
@@ -13470,8 +13605,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fma y, z, (fpext (fmul u, v))))
// -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
- if (N1.getOpcode() == PreferredFusedOpcode &&
- N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
+ if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
N1->hasOneUse()) {
SDValue N120 = N1.getOperand(2).getOperand(0);
if (isContractableAndReassociableFMUL(N120) &&
@@ -13495,8 +13629,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// FIXME: This turns two single-precision and one double-precision
// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.
- if (N1.getOpcode() == ISD::FP_EXTEND &&
- N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
+ if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {
SDValue CvtSrc = N1.getOperand(0);
SDValue N100 = CvtSrc.getOperand(0);
SDValue N101 = CvtSrc.getOperand(1);
@@ -13537,12 +13670,13 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
// The transforms below are incorrect when x == 0 and y == inf, because the
// intermediate multiplication produces a nan.
- if (!Options.NoInfsFPMath)
+ SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
+ if (!hasNoInfs(Options, FAdd))
return SDValue();
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
+ isContractableFMUL(Options, SDValue(N, 0)) &&
TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
@@ -13632,7 +13766,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (fadd c1, c2) -> c1 + c2
@@ -13840,7 +13974,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (fsub c1, c2) -> c1-c2
@@ -13925,7 +14059,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
// This just handles C1 * C2 for vectors. Other vector folds are below.
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
}
@@ -13970,10 +14104,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (N1CFP && N1CFP->isExactlyValue(+2.0))
return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
- // fold (fmul X, -1.0) -> (fneg X)
- if (N1CFP && N1CFP->isExactlyValue(-1.0))
- if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
- return DAG.getNode(ISD::FNEG, DL, VT, N0);
+ // fold (fmul X, -1.0) -> (fsub -0.0, X)
+ if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
+ return DAG.getNode(ISD::FSUB, DL, VT,
+ DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
+ }
+ }
// -N0 * -N1 --> N0 * N1
TargetLowering::NegatibleCost CostN0 =
@@ -14259,7 +14396,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (fdiv c1, c2) -> c1/c2
@@ -16244,11 +16381,12 @@ struct LoadedSlice {
return false;
// Check if it will be merged with the load.
- // 1. Check the alignment constraint.
- Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign(
- ResVT.getTypeForEVT(*DAG->getContext()));
-
- if (RequiredAlignment > getAlign())
+ // 1. Check the alignment / fast memory access constraint.
+ bool IsFast = false;
+ if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
+ Origin->getAddressSpace(), getAlign(),
+ Origin->getMemOperand()->getFlags(), &IsFast) ||
+ !IsFast)
return false;
// 2. Check that the load is a legal operation for that type.
@@ -16269,7 +16407,7 @@ struct LoadedSlice {
/// \p UsedBits looks like 0..0 1..1 0..0.
static bool areUsedBitsDense(const APInt &UsedBits) {
// If all the bits are one, this is dense!
- if (UsedBits.isAllOnesValue())
+ if (UsedBits.isAllOnes())
return true;
// Get rid of the unused bits on the right.
@@ -16278,7 +16416,7 @@ static bool areUsedBitsDense(const APInt &UsedBits) {
if (NarrowedUsedBits.countLeadingZeros())
NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
// Check that the chunk of bits is completely used.
- return NarrowedUsedBits.isAllOnesValue();
+ return NarrowedUsedBits.isAllOnes();
}
/// Check whether or not \p First and \p Second are next to each other
@@ -16696,8 +16834,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
unsigned BitWidth = N1.getValueSizeInBits();
APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
if (Opc == ISD::AND)
- Imm ^= APInt::getAllOnesValue(BitWidth);
- if (Imm == 0 || Imm.isAllOnesValue())
+ Imm ^= APInt::getAllOnes(BitWidth);
+ if (Imm == 0 || Imm.isAllOnes())
return SDValue();
unsigned ShAmt = Imm.countTrailingZeros();
unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
@@ -16724,16 +16862,19 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if ((Imm & Mask) == Imm) {
APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
if (Opc == ISD::AND)
- NewImm ^= APInt::getAllOnesValue(NewBW);
+ NewImm ^= APInt::getAllOnes(NewBW);
uint64_t PtrOff = ShAmt / 8;
// For big endian targets, we need to adjust the offset to the pointer to
// load the correct bytes.
if (DAG.getDataLayout().isBigEndian())
PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
+ bool IsFast = false;
Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
- Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
- if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
+ LD->getAddressSpace(), NewAlign,
+ LD->getMemOperand()->getFlags(), &IsFast) ||
+ !IsFast)
return SDValue();
SDValue NewPtr =
@@ -16787,27 +16928,26 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
if (VTSize.isScalable())
return SDValue();
+ bool FastLD = false, FastST = false;
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
!TLI.isOperationLegal(ISD::STORE, IntVT) ||
!TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
- !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
- return SDValue();
-
- Align LDAlign = LD->getAlign();
- Align STAlign = ST->getAlign();
- Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
- Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy);
- if (LDAlign < ABIAlign || STAlign < ABIAlign)
+ !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
+ *LD->getMemOperand(), &FastLD) ||
+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
+ *ST->getMemOperand(), &FastST) ||
+ !FastLD || !FastST)
return SDValue();
SDValue NewLD =
DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
- LD->getPointerInfo(), LDAlign);
+ LD->getPointerInfo(), LD->getAlign());
SDValue NewST =
DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
- ST->getPointerInfo(), STAlign);
+ ST->getPointerInfo(), ST->getAlign());
AddToWorklist(NewLD.getNode());
AddToWorklist(NewST.getNode());
@@ -16838,8 +16978,10 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
SDValue &ConstNode) {
APInt Val;
- // If the add only has one use, this would be OK to do.
- if (AddNode.getNode()->hasOneUse())
+ // If the add only has one use, and the target thinks the folding is
+ // profitable or does not lead to worse code, this would be OK to do.
+ if (AddNode.getNode()->hasOneUse() &&
+ TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
return true;
// Walk all the users of the constant with which we're multiplying.
@@ -16931,6 +17073,22 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
unsigned SizeInBits = NumStores * ElementSizeBits;
unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+ Optional<MachineMemOperand::Flags> Flags;
+ AAMDNodes AAInfo;
+ for (unsigned I = 0; I != NumStores; ++I) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
+ if (!Flags) {
+ Flags = St->getMemOperand()->getFlags();
+ AAInfo = St->getAAInfo();
+ continue;
+ }
+ // Skip merging if there's an inconsistent flag.
+ if (Flags != St->getMemOperand()->getFlags())
+ return false;
+ // Concatenate AA metadata.
+ AAInfo = AAInfo.concat(St->getAAInfo());
+ }
+
EVT StoreTy;
if (UseVector) {
unsigned Elts = NumStores * NumMemElts;
@@ -17048,9 +17206,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
// make sure we use trunc store if it's necessary to be legal.
SDValue NewStore;
if (!UseTrunc) {
- NewStore =
- DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), FirstInChain->getAlign());
+ NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(),
+ FirstInChain->getAlign(), Flags.getValue(), AAInfo);
} else { // Must be realized as a trunc store
EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
@@ -17062,7 +17220,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
NewStore = DAG.getTruncStore(
NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
- FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
+ FirstInChain->getAlign(), Flags.getValue(), AAInfo);
}
// Replace all merged stores with the new store.
@@ -17359,7 +17517,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(
SDValue StoredVal = ST->getValue();
bool IsElementZero = false;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
- IsElementZero = C->isNullValue();
+ IsElementZero = C->isZero();
else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
IsElementZero = C->getConstantFPValue()->isNullValue();
if (IsElementZero) {
@@ -17378,7 +17536,8 @@ bool DAGCombiner::tryStoreMergeOfConstants(
break;
if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
+ DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast) {
@@ -17390,7 +17549,8 @@ bool DAGCombiner::tryStoreMergeOfConstants(
EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
+ DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast) {
@@ -17409,7 +17569,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(
unsigned Elts = (i + 1) * NumMemElts;
EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
- TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, Ty,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast)
@@ -17485,7 +17645,8 @@ bool DAGCombiner::tryStoreMergeOfExtracts(
if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
break;
- if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+ if (TLI.isTypeLegal(Ty) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, Ty,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast)
@@ -17633,8 +17794,13 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
bool IsFastSt = false;
bool IsFastLd = false;
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ // Don't try vector types if we need a rotate. We may still fail the
+ // legality checks for the integer type, but we can't handle the rotate
+ // case with vectors.
+ // FIXME: We could use a shuffle in place of the rotate.
+ if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
+ DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFastSt) &&
IsFastSt &&
@@ -17648,7 +17814,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
StoreTy = EVT::getIntegerVT(Context, SizeInBits);
if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
+ DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFastSt) &&
IsFastSt &&
@@ -17662,7 +17829,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
+ DAG.getMachineFunction()) &&
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
@@ -18214,7 +18382,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
case ISD::LIFETIME_END:
// We can forward past any lifetime start/end that can be proven not to
// alias the node.
- if (!isAlias(Chain.getNode(), N))
+ if (!mayAlias(Chain.getNode(), N))
Chains.push_back(Chain.getOperand(0));
break;
case ISD::STORE: {
@@ -18592,32 +18760,35 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
if (!VecEltVT.isByteSized())
return SDValue();
- Align Alignment = OriginalLoad->getAlign();
- Align NewAlign = DAG.getDataLayout().getABITypeAlign(
- VecEltVT.getTypeForEVT(*DAG.getContext()));
-
- if (NewAlign > Alignment ||
- !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
- return SDValue();
-
- ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
- ISD::NON_EXTLOAD : ISD::EXTLOAD;
- if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
+ ISD::LoadExtType ExtTy =
+ ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
+ if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
+ !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
return SDValue();
- Alignment = NewAlign;
-
+ Align Alignment = OriginalLoad->getAlign();
MachinePointerInfo MPI;
SDLoc DL(EVE);
if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
int Elt = ConstEltNo->getZExtValue();
unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
+ Alignment = commonAlignment(Alignment, PtrOff);
} else {
// Discard the pointer info except the address space because the memory
// operand can't represent this new access since the offset is variable.
MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
+ Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
}
+
+ bool IsFast = false;
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
+ OriginalLoad->getAddressSpace(), Alignment,
+ OriginalLoad->getMemOperand()->getFlags(),
+ &IsFast) ||
+ !IsFast)
+ return SDValue();
+
SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
InVecVT, EltNo);
@@ -18863,7 +19034,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
Use->getOperand(0) == VecOp &&
isa<ConstantSDNode>(Use->getOperand(1));
})) {
- APInt DemandedElts = APInt::getNullValue(NumElts);
+ APInt DemandedElts = APInt::getZero(NumElts);
for (SDNode *Use : VecOp->uses()) {
auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
if (CstElt->getAPIntValue().ult(NumElts))
@@ -18876,7 +19047,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
AddToWorklist(N);
return SDValue(N, 0);
}
- APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
+ APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
// We simplified the vector operand of this extract element. If this
// extract is not dead, visit it again so it is folded properly.
@@ -19671,8 +19842,10 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
// Make sure the first element matches
// (zext (extract_vector_elt X, C))
+ // Offset must be a constant multiple of the
+ // known-minimum vector length of the result type.
int64_t Offset = checkElem(Op0);
- if (Offset < 0)
+ if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
return SDValue();
unsigned NumElems = N->getNumOperands();
@@ -19843,6 +20016,44 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
}
+// Attempt to merge nested concat_vectors/undefs.
+// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
+// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
+static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
+ SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+
+ // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
+ EVT SubVT;
+ SDValue FirstConcat;
+ for (const SDValue &Op : N->ops()) {
+ if (Op.isUndef())
+ continue;
+ if (Op.getOpcode() != ISD::CONCAT_VECTORS)
+ return SDValue();
+ if (!FirstConcat) {
+ SubVT = Op.getOperand(0).getValueType();
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
+ return SDValue();
+ FirstConcat = Op;
+ continue;
+ }
+ if (SubVT != Op.getOperand(0).getValueType())
+ return SDValue();
+ }
+ assert(FirstConcat && "Concat of all-undefs found");
+
+ SmallVector<SDValue> ConcatOps;
+ for (const SDValue &Op : N->ops()) {
+ if (Op.isUndef()) {
+ ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
+ continue;
+ }
+ ConcatOps.append(Op->op_begin(), Op->op_end());
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
+}
+
// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
// most two distinct vectors the same size as the result, attempt to turn this
@@ -20102,13 +20313,19 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
}
// Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
+ // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
if (SDValue V = combineConcatVectorOfScalars(N, DAG))
return V;
- // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
- if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
+ // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
+ if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))
+ return V;
+
+ // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
return V;
+ }
if (SDValue V = combineConcatVectorOfCasts(N, DAG))
return V;
@@ -20350,9 +20567,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
return SDValue();
auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
- auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
- if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
- !ExtIdx)
+ if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
return SDValue();
// Allow targets to opt-out.
@@ -20362,7 +20577,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
if (!VT.isByteSized())
return SDValue();
- unsigned Index = ExtIdx->getZExtValue();
+ unsigned Index = Extract->getConstantOperandVal(1);
unsigned NumElts = VT.getVectorMinNumElements();
// The definition of EXTRACT_SUBVECTOR states that the index must be a
@@ -20491,7 +20706,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// If the concatenated source types match this extract, it's a direct
// simplification:
// extract_subvec (concat V1, V2, ...), i --> Vi
- if (ConcatSrcNumElts == ExtNumElts)
+ if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
return V.getOperand(ConcatOpIdx);
// If the concatenated source vectors are a multiple length of this extract,
@@ -20499,7 +20714,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// concat operand. Example:
// v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
// v2i8 extract_subvec v8i8 Y, 6
- if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
+ if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
+ ConcatSrcNumElts % ExtNumElts == 0) {
SDLoc DL(N);
unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
@@ -21134,15 +21350,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
// Canonicalize shuffle v, v -> v, undef
- if (N0 == N1) {
- SmallVector<int, 8> NewMask;
- for (unsigned i = 0; i != NumElts; ++i) {
- int Idx = SVN->getMaskElt(i);
- if (Idx >= (int)NumElts) Idx -= NumElts;
- NewMask.push_back(Idx);
- }
- return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
- }
+ if (N0 == N1)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
+ createUnaryMask(SVN->getMask(), NumElts));
// Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
if (N0.isUndef())
@@ -21293,6 +21503,70 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
}
+ // See if we can replace a shuffle with an insert_subvector.
+ // e.g. v2i32 into v8i32:
+ // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
+ // --> insert_subvector(lhs,rhs1,4).
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
+ TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {
+ auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
+ // Ensure RHS subvectors are legal.
+ assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
+ EVT SubVT = RHS.getOperand(0).getValueType();
+ int NumSubVecs = RHS.getNumOperands();
+ int NumSubElts = SubVT.getVectorNumElements();
+ assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
+ if (!TLI.isTypeLegal(SubVT))
+ return SDValue();
+
+ // Don't bother if we have an unary shuffle (matches undef + LHS elts).
+ if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
+ return SDValue();
+
+ // Search [NumSubElts] spans for RHS sequence.
+ // TODO: Can we avoid nested loops to increase performance?
+ SmallVector<int> InsertionMask(NumElts);
+ for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
+ for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
+ // Reset mask to identity.
+ std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
+
+ // Add subvector insertion.
+ std::iota(InsertionMask.begin() + SubIdx,
+ InsertionMask.begin() + SubIdx + NumSubElts,
+ NumElts + (SubVec * NumSubElts));
+
+ // See if the shuffle mask matches the reference insertion mask.
+ bool MatchingShuffle = true;
+ for (int i = 0; i != (int)NumElts; ++i) {
+ int ExpectIdx = InsertionMask[i];
+ int ActualIdx = Mask[i];
+ if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
+ MatchingShuffle = false;
+ break;
+ }
+ }
+
+ if (MatchingShuffle)
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
+ RHS.getOperand(SubVec),
+ DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
+ }
+ }
+ return SDValue();
+ };
+ ArrayRef<int> Mask = SVN->getMask();
+ if (N1.getOpcode() == ISD::CONCAT_VECTORS)
+ if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
+ return InsertN1;
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
+ SmallVector<int> CommuteMask(Mask.begin(), Mask.end());
+ ShuffleVectorSDNode::commuteMask(CommuteMask);
+ if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
+ return InsertN0;
+ }
+ }
+
// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
@@ -21862,6 +22136,40 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVPOp(SDNode *N) {
+ // VP operations in which all vector elements are disabled - either by
+ // determining that the mask is all false or that the EVL is 0 - can be
+ // eliminated.
+ bool AreAllEltsDisabled = false;
+ if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
+ AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
+ if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
+ AreAllEltsDisabled |=
+ ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
+
+ // This is the only generic VP combine we support for now.
+ if (!AreAllEltsDisabled)
+ return SDValue();
+
+ // Binary operations can be replaced by UNDEF.
+ if (ISD::isVPBinaryOp(N->getOpcode()))
+ return DAG.getUNDEF(N->getValueType(0));
+
+ // VP Memory operations can be replaced by either the chain (stores) or the
+ // chain + undef (loads).
+ if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
+ if (MemSD->writeMem())
+ return MemSD->getChain();
+ return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
+ }
+
+ // Reduction operations return the start operand when no elements are active.
+ if (ISD::isVPReduction(N->getOpcode()))
+ return N->getOperand(0);
+
+ return SDValue();
+}
+
/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
/// with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
@@ -21918,7 +22226,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
else
Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
- if (Bits.isAllOnesValue())
+ if (Bits.isAllOnes())
Indices.push_back(i);
else if (Bits == 0)
Indices.push_back(i + NumSubElts);
@@ -21953,7 +22261,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
/// If a vector binop is performed on splat values, it may be profitable to
/// extract, scalarize, and insert/splat.
-static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
+static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
+ const SDLoc &DL) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
unsigned Opcode = N->getOpcode();
@@ -21974,7 +22283,6 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
!TLI.isOperationLegalOrCustom(Opcode, EltVT))
return SDValue();
- SDLoc DL(N);
SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
@@ -21998,20 +22306,19 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
}
/// Visit a binary vector operation, like ADD.
-SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
- assert(N->getValueType(0).isVector() &&
- "SimplifyVBinOp only works on vectors!");
+SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
+ EVT VT = N->getValueType(0);
+ assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue Ops[] = {LHS, RHS};
- EVT VT = N->getValueType(0);
unsigned Opcode = N->getOpcode();
SDNodeFlags Flags = N->getFlags();
// See if we can constant fold the vector operation.
- if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
- Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
+ if (SDValue Fold = DAG.FoldConstantArithmetic(Opcode, SDLoc(LHS),
+ LHS.getValueType(), Ops))
return Fold;
// Move unary shuffles with identical masks after a vector binop:
@@ -22029,7 +22336,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
(LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
- SDLoc DL(N);
SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
RHS.getOperand(0), Flags);
SDValue UndefV = LHS.getOperand(1);
@@ -22046,7 +22352,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
// binop (splat X), (splat C) --> splat (binop X, C)
- SDLoc DL(N);
SDValue X = Shuf0->getOperand(0);
SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
@@ -22056,7 +22361,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
// binop (splat C), (splat X) --> splat (binop C, X)
- SDLoc DL(N);
SDValue X = Shuf1->getOperand(0);
SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
@@ -22080,7 +22384,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
LegalOperations)) {
// (binop undef, undef) may not return undef, so compute that result.
- SDLoc DL(N);
SDValue VecC =
DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
@@ -22107,7 +22410,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
EVT NarrowVT = LHS.getOperand(0).getValueType();
if (NarrowVT == RHS.getOperand(0).getValueType() &&
TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
- SDLoc DL(N);
unsigned NumOperands = LHS.getNumOperands();
SmallVector<SDValue, 4> ConcatOps;
for (unsigned i = 0; i != NumOperands; ++i) {
@@ -22120,7 +22422,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
}
}
- if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
+ if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
return V;
return SDValue();
@@ -22434,15 +22736,23 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
return SDValue();
- if (!N->isOnlyUserOf(N0.getNode()) || !N->isOnlyUserOf(N1.getNode()))
+ // The use checks are intentionally on SDNode because we may be dealing
+ // with opcodes that produce more than one SDValue.
+ // TODO: Do we really need to check N0 (the condition operand of the select)?
+ // But removing that clause could cause an infinite loop...
+ if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
return SDValue();
+ // Binops may include opcodes that return multiple values, so all values
+ // must be created/propagated from the newly created binops below.
+ SDVTList OpVTs = N1->getVTList();
+
// Fold select(cond, binop(x, y), binop(z, y))
// --> binop(select(cond, x, z), y)
if (N1.getOperand(1) == N2.getOperand(1)) {
SDValue NewSel =
DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
- SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, NewSel, N1.getOperand(1));
+ SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
NewBinOp->setFlags(N1->getFlags());
NewBinOp->intersectFlagsWith(N2->getFlags());
return NewBinOp;
@@ -22456,7 +22766,7 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
VT == N2.getOperand(1).getValueType()) {
SDValue NewSel =
DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
- SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, N1.getOperand(0), NewSel);
+ SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
NewBinOp->setFlags(N1->getFlags());
NewBinOp->intersectFlagsWith(N2->getFlags());
return NewBinOp;
@@ -22584,7 +22894,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
// fold select_cc true, x, y -> x
// fold select_cc false, x, y -> y
- return !(SCCC->isNullValue()) ? N2 : N3;
+ return !(SCCC->isZero()) ? N2 : N3;
}
}
@@ -22683,7 +22993,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
// select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
// select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
// select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
- if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
SDValue ValueOnZero = N2;
SDValue Count = N3;
// If the condition is NE instead of E, swap the operands.
@@ -22710,6 +23020,20 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
}
}
+ // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
+ // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
+ if (!NotExtCompare && N1C && N2C && N3C &&
+ N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
+ ((N1C->isAllOnes() && CC == ISD::SETGT) ||
+ (N1C->isZero() && CC == ISD::SETLT)) &&
+ !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
+ SDValue ASR = DAG.getNode(
+ ISD::SRA, DL, CmpOpVT, N0,
+ DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
+ return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
+ DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
+ }
+
return SDValue();
}
@@ -22750,7 +23074,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
return SDValue();
// Avoid division by zero.
- if (C->isNullValue())
+ if (C->isZero())
return SDValue();
SmallVector<SDNode *, 8> Built;
@@ -22795,7 +23119,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
/// For the reciprocal, we need to find the zero of the function:
-/// F(X) = A X - 1 [which has a zero at X = 1/A]
+/// F(X) = 1/X - A [which has a zero at X = 1/A]
/// =>
/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
/// does not require additional intermediate precision]
@@ -22806,9 +23130,10 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
if (LegalDAG)
return SDValue();
- // TODO: Handle half and/or extended types?
+ // TODO: Handle extended types?
EVT VT = Op.getValueType();
- if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+ if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
+ VT.getScalarType() != MVT::f64)
return SDValue();
// If estimates are explicitly disabled for this function, we're done.
@@ -22945,9 +23270,10 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
if (LegalDAG)
return SDValue();
- // TODO: Handle half and/or extended types?
+ // TODO: Handle extended types?
EVT VT = Op.getValueType();
- if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+ if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
+ VT.getScalarType() != MVT::f64)
return SDValue();
// If estimates are explicitly disabled for this function, we're done.
@@ -22997,7 +23323,7 @@ SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
}
/// Return true if there is any possibility that the two addresses overlap.
-bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
+bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
struct MemUseCharacteristics {
bool IsVolatile;
@@ -23157,7 +23483,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
// TODO: Relax aliasing for unordered atomics (see D66309)
bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
cast<LSBaseSDNode>(C.getNode())->isSimple();
- if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
+ if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
// Look further up the chain.
C = C.getOperand(0);
return true;
@@ -23175,7 +23501,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
case ISD::LIFETIME_END: {
// We can forward past any lifetime start/end that can be proven not to
// alias the memory access.
- if (!isAlias(N, C.getNode())) {
+ if (!mayAlias(N, C.getNode())) {
// Look further up the chain.
C = C.getOperand(0);
return true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 4ca731cfdf62..4d1449bc2751 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -75,6 +75,7 @@
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalValue.h"
@@ -195,10 +196,8 @@ void FastISel::flushLocalValueMap() {
EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt)
: FuncInfo.MBB->rend();
MachineBasicBlock::reverse_iterator RI(LastLocalValue);
- for (; RI != RE;) {
- MachineInstr &LocalMI = *RI;
- // Increment before erasing what it points to.
- ++RI;
+ for (MachineInstr &LocalMI :
+ llvm::make_early_inc_range(llvm::make_range(RI, RE))) {
Register DefReg = findLocalRegDef(LocalMI);
if (!DefReg)
continue;
@@ -622,7 +621,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
const CallInst *CI, unsigned StartIdx) {
- for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) {
+ for (unsigned i = StartIdx, e = CI->arg_size(); i != e; ++i) {
Value *Val = CI->getArgOperand(i);
// Check for constants and encode them with a StackMaps::ConstantOp prefix.
if (const auto *C = dyn_cast<ConstantInt>(Val)) {
@@ -784,7 +783,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
// Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
// This includes all meta-operands up to but not including CC.
unsigned NumMetaOpers = PatchPointOpers::CCPos;
- assert(I->getNumArgOperands() >= NumMetaOpers + NumArgs &&
+ assert(I->arg_size() >= NumMetaOpers + NumArgs &&
"Not enough arguments provided to the patchpoint intrinsic");
// For AnyRegCC the arguments are lowered later on manually.
@@ -1151,6 +1150,8 @@ bool FastISel::lowerCall(const CallInst *CI) {
CLI.setCallee(RetTy, FuncTy, CI->getCalledOperand(), std::move(Args), *CI)
.setTailCall(IsTailCall);
+ diagnoseDontCall(*CI);
+
return lowerCallTo(CLI);
}
@@ -1264,7 +1265,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
// If using instruction referencing, mutate this into a DBG_INSTR_REF,
// to be later patched up by finalizeDebugInstrRefs. Tack a deref onto
// the expression, we don't have an "indirect" flag in DBG_INSTR_REF.
- if (TM.Options.ValueTrackingVariableLocations && Op->isReg()) {
+ if (FuncInfo.MF->useDebugInstrRef() && Op->isReg()) {
Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF));
Builder->getOperand(1).ChangeToImmediate(0);
auto *NewExpr =
@@ -1292,18 +1293,22 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U,
DI->getVariable(), DI->getExpression());
} else if (const auto *CI = dyn_cast<ConstantInt>(V)) {
+ // See if there's an expression to constant-fold.
+ DIExpression *Expr = DI->getExpression();
+ if (Expr)
+ std::tie(Expr, CI) = Expr->constantFold(CI);
if (CI->getBitWidth() > 64)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addCImm(CI)
.addImm(0U)
.addMetadata(DI->getVariable())
- .addMetadata(DI->getExpression());
+ .addMetadata(Expr);
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addImm(CI->getZExtValue())
.addImm(0U)
.addMetadata(DI->getVariable())
- .addMetadata(DI->getExpression());
+ .addMetadata(Expr);
} else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addFPImm(CF)
@@ -1319,7 +1324,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
// If using instruction referencing, mutate this into a DBG_INSTR_REF,
// to be later patched up by finalizeDebugInstrRefs.
- if (TM.Options.ValueTrackingVariableLocations) {
+ if (FuncInfo.MF->useDebugInstrRef()) {
Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF));
Builder->getOperand(1).ChangeToImmediate(0);
}
@@ -2303,8 +2308,7 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {
bool IsDereferenceable = I->hasMetadata(LLVMContext::MD_dereferenceable);
const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);
- AAMDNodes AAInfo;
- I->getAAMetadata(AAInfo);
+ AAMDNodes AAInfo = I->getAAMetadata();
if (!Alignment) // Ensure that codegen never sees alignment 0.
Alignment = DL.getABITypeAlign(ValTy);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 348fad6daf8f..c1bb65409282 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -722,7 +722,7 @@ void InstrEmitter::AddDbgValueLocationOps(
MIB.addFrameIndex(Op.getFrameIx());
break;
case SDDbgOperand::VREG:
- MIB.addReg(Op.getVReg(), RegState::Debug);
+ MIB.addReg(Op.getVReg());
break;
case SDDbgOperand::SDNODE: {
SDValue V = SDValue(Op.getSDNode(), Op.getResNo());
@@ -862,7 +862,7 @@ MachineInstr *InstrEmitter::EmitDbgNoLocation(SDDbgValue *SD) {
DebugLoc DL = SD->getDebugLoc();
auto MIB = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE));
MIB.addReg(0U);
- MIB.addReg(0U, RegState::Debug);
+ MIB.addReg(0U);
MIB.addMetadata(Var);
MIB.addMetadata(Expr);
return &*MIB;
@@ -872,22 +872,33 @@ MachineInstr *
InstrEmitter::EmitDbgValueFromSingleOp(SDDbgValue *SD,
DenseMap<SDValue, Register> &VRBaseMap) {
MDNode *Var = SD->getVariable();
- MDNode *Expr = SD->getExpression();
+ DIExpression *Expr = SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
assert(SD->getLocationOps().size() == 1 &&
"Non variadic dbg_value should have only one location op");
+ // See about constant-folding the expression.
+ // Copy the location operand in case we replace it.
+ SmallVector<SDDbgOperand, 1> LocationOps(1, SD->getLocationOps()[0]);
+ if (Expr && LocationOps[0].getKind() == SDDbgOperand::CONST) {
+ const Value *V = LocationOps[0].getConst();
+ if (auto *C = dyn_cast<ConstantInt>(V)) {
+ std::tie(Expr, C) = Expr->constantFold(C);
+ LocationOps[0] = SDDbgOperand::fromConst(C);
+ }
+ }
+
// Emit non-variadic dbg_value nodes as DBG_VALUE.
// DBG_VALUE := "DBG_VALUE" loc, isIndirect, var, expr
auto MIB = BuildMI(*MF, DL, II);
- AddDbgValueLocationOps(MIB, II, SD->getLocationOps(), VRBaseMap);
+ AddDbgValueLocationOps(MIB, II, LocationOps, VRBaseMap);
if (SD->isIndirect())
MIB.addImm(0U);
else
- MIB.addReg(0U, RegState::Debug);
+ MIB.addReg(0U);
return MIB.addMetadata(Var).addMetadata(Expr);
}
@@ -1329,5 +1340,5 @@ InstrEmitter::InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb,
TRI(MF->getSubtarget().getRegisterInfo()),
TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb),
InsertPos(insertpos) {
- EmitDebugInstrRefs = TM.Options.ValueTrackingVariableLocations;
+ EmitDebugInstrRefs = MF->useDebugInstrRef();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index d92b23f56e4d..eb9d2286aeb4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1164,6 +1164,16 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(),
cast<MaskedStoreSDNode>(Node)->getValue().getValueType());
break;
+ case ISD::VP_SCATTER:
+ Action = TLI.getOperationAction(
+ Node->getOpcode(),
+ cast<VPScatterSDNode>(Node)->getValue().getValueType());
+ break;
+ case ISD::VP_STORE:
+ Action = TLI.getOperationAction(
+ Node->getOpcode(),
+ cast<VPStoreSDNode>(Node)->getValue().getValueType());
+ break;
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_ADD:
@@ -1181,6 +1191,22 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Node->getOpcode(), Node->getOperand(0).getValueType());
break;
case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ case ISD::VP_REDUCE_FADD:
+ case ISD::VP_REDUCE_FMUL:
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ case ISD::VP_REDUCE_FMAX:
+ case ISD::VP_REDUCE_FMIN:
+ case ISD::VP_REDUCE_SEQ_FADD:
+ case ISD::VP_REDUCE_SEQ_FMUL:
Action = TLI.getOperationAction(
Node->getOpcode(), Node->getOperand(1).getValueType());
break;
@@ -1333,9 +1359,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
Visited.insert(Op.getNode());
Worklist.push_back(Idx.getNode());
SDValue StackPtr, Ch;
- for (SDNode::use_iterator UI = Vec.getNode()->use_begin(),
- UE = Vec.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (SDNode *User : Vec.getNode()->uses()) {
if (StoreSDNode *ST = dyn_cast<StoreSDNode>(User)) {
if (ST->isIndexed() || ST->isTruncatingStore() ||
ST->getValue() != Vec)
@@ -2197,9 +2221,7 @@ static bool useSinCos(SDNode *Node) {
? ISD::FCOS : ISD::FSIN;
SDValue Op0 = Node->getOperand(0);
- for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
- UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (const SDNode *User : Op0.getNode()->uses()) {
if (User == Node)
continue;
// The other user might have been turned into sincos already.
@@ -2636,7 +2658,7 @@ SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) {
// If CTPOP is legal, use it. Otherwise use shifts and xor.
SDValue Result;
- if (TLI.isOperationLegal(ISD::CTPOP, VT)) {
+ if (TLI.isOperationLegalOrPromote(ISD::CTPOP, VT)) {
Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
} else {
Result = Op;
@@ -2658,21 +2680,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
bool NeedInvert;
switch (Node->getOpcode()) {
case ISD::ABS:
- if (TLI.expandABS(Node, Tmp1, DAG))
+ if ((Tmp1 = TLI.expandABS(Node, DAG)))
Results.push_back(Tmp1);
break;
case ISD::CTPOP:
- if (TLI.expandCTPOP(Node, Tmp1, DAG))
+ if ((Tmp1 = TLI.expandCTPOP(Node, DAG)))
Results.push_back(Tmp1);
break;
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
- if (TLI.expandCTLZ(Node, Tmp1, DAG))
+ if ((Tmp1 = TLI.expandCTLZ(Node, DAG)))
Results.push_back(Tmp1);
break;
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
- if (TLI.expandCTTZ(Node, Tmp1, DAG))
+ if ((Tmp1 = TLI.expandCTTZ(Node, DAG)))
Results.push_back(Tmp1);
break;
case ISD::BITREVERSE:
@@ -3229,9 +3251,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
"Don't know how to expand this subtraction!");
- Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1),
- DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl,
- VT));
+ Tmp1 = DAG.getNOT(dl, Node->getOperand(1), VT);
Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT));
Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));
break;
@@ -4242,8 +4262,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
SDValue Op = Node->getOperand(IsStrict ? 1 : 0);
SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
EVT VT = Node->getValueType(0);
- assert(cast<ConstantSDNode>(Node->getOperand(IsStrict ? 2 : 1))
- ->isNullValue() &&
+ assert(cast<ConstantSDNode>(Node->getOperand(IsStrict ? 2 : 1))->isZero() &&
"Unable to expand as libcall if it is not normal rounding");
RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), VT);
@@ -4737,6 +4756,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
break;
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FROUND:
case ISD::STRICT_FSIN:
case ISD::STRICT_FCOS:
case ISD::STRICT_FLOG:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 3553f9ec16c2..27f9cede1922 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -61,6 +61,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to soften the result of this operator!");
+ case ISD::ARITH_FENCE: R = SoftenFloatRes_ARITH_FENCE(N); break;
case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;
case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
@@ -206,6 +207,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREEZE(SDNode *N) {
GetSoftenedFloat(N->getOperand(0)));
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_ARITH_FENCE(SDNode *N) {
+ EVT Ty = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue NewFence = DAG.getNode(ISD::ARITH_FENCE, SDLoc(N), Ty,
+ GetSoftenedFloat(N->getOperand(0)));
+ return NewFence;
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N,
unsigned ResNo) {
SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
@@ -257,7 +265,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
unsigned Size = NVT.getSizeInBits();
// Mask = ~(1 << (Size-1))
- APInt API = APInt::getAllOnesValue(Size);
+ APInt API = APInt::getAllOnes(Size);
API.clearBit(Size - 1);
SDValue Mask = DAG.getConstant(API, SDLoc(N), NVT);
SDValue Op = GetSoftenedFloat(N->getOperand(0));
@@ -820,6 +828,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
+ case ISD::STRICT_FP_TO_FP16:
case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
@@ -871,13 +880,17 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
// We actually deal with the partially-softened FP_TO_FP16 node too, which
// returns an i16 so doesn't meet the constraints necessary for FP_ROUND.
assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 ||
+ N->getOpcode() == ISD::STRICT_FP_TO_FP16 ||
N->getOpcode() == ISD::STRICT_FP_ROUND);
bool IsStrict = N->isStrictFPOpcode();
SDValue Op = N->getOperand(IsStrict ? 1 : 0);
EVT SVT = Op.getValueType();
EVT RVT = N->getValueType(0);
- EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT;
+ EVT FloatRVT = (N->getOpcode() == ISD::FP_TO_FP16 ||
+ N->getOpcode() == ISD::STRICT_FP_TO_FP16)
+ ? MVT::f16
+ : RVT;
RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 328e9430d635..1fa4d88fcb4a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -23,6 +23,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
using namespace llvm;
#define DEBUG_TYPE "legalize-types"
@@ -81,15 +82,23 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FSETCCS:
case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
case ISD::SMIN:
- case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break;
+ case ISD::SMAX:
+ Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ false);
+ break;
case ISD::UMIN:
case ISD::UMAX: Res = PromoteIntRes_UMINUMAX(N); break;
- case ISD::SHL: Res = PromoteIntRes_SHL(N); break;
+ case ISD::SHL:
+ Res = PromoteIntRes_SHL(N, /*IsVP*/ false);
+ break;
case ISD::SIGN_EXTEND_INREG:
Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
- case ISD::SRA: Res = PromoteIntRes_SRA(N); break;
- case ISD::SRL: Res = PromoteIntRes_SRL(N); break;
+ case ISD::SRA:
+ Res = PromoteIntRes_SRA(N, /*IsVP*/ false);
+ break;
+ case ISD::SRL:
+ Res = PromoteIntRes_SRL(N, /*IsVP*/ false);
+ break;
case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;
case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
@@ -144,13 +153,19 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::XOR:
case ISD::ADD:
case ISD::SUB:
- case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
+ case ISD::MUL:
+ Res = PromoteIntRes_SimpleIntBinOp(N, /*IsVP*/ false);
+ break;
case ISD::SDIV:
- case ISD::SREM: Res = PromoteIntRes_SExtIntBinOp(N); break;
+ case ISD::SREM:
+ Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ false);
+ break;
case ISD::UDIV:
- case ISD::UREM: Res = PromoteIntRes_ZExtIntBinOp(N); break;
+ case ISD::UREM:
+ Res = PromoteIntRes_ZExtIntBinOp(N, /*IsVP*/ false);
+ break;
case ISD::SADDO:
case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
@@ -220,6 +235,18 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
Res = PromoteIntRes_VECREDUCE(N);
break;
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ Res = PromoteIntRes_VP_REDUCE(N);
+ break;
+
case ISD::FREEZE:
Res = PromoteIntRes_FREEZE(N);
break;
@@ -233,6 +260,32 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FSHR:
Res = PromoteIntRes_FunnelShift(N);
break;
+
+ case ISD::VP_AND:
+ case ISD::VP_OR:
+ case ISD::VP_XOR:
+ case ISD::VP_ADD:
+ case ISD::VP_SUB:
+ case ISD::VP_MUL:
+ Res = PromoteIntRes_SimpleIntBinOp(N, /*IsVP*/ true);
+ break;
+ case ISD::VP_SDIV:
+ case ISD::VP_SREM:
+ Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ true);
+ break;
+ case ISD::VP_UDIV:
+ case ISD::VP_UREM:
+ Res = PromoteIntRes_ZExtIntBinOp(N, /*IsVP*/ true);
+ break;
+ case ISD::VP_SHL:
+ Res = PromoteIntRes_SHL(N, /*IsVP*/ true);
+ break;
+ case ISD::VP_ASHR:
+ Res = PromoteIntRes_SRA(N, /*IsVP*/ true);
+ break;
+ case ISD::VP_LSHR:
+ Res = PromoteIntRes_SRL(N, /*IsVP*/ true);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -438,19 +491,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
CreateStackStoreLoad(InOp, OutVT));
}
-// Helper for BSWAP/BITREVERSE promotion to ensure we can fit any shift amount
-// in the VT returned by getShiftAmountTy and to return a safe VT if we can't.
-static EVT getShiftAmountTyForConstant(EVT VT, const TargetLowering &TLI,
- SelectionDAG &DAG) {
- EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- // If any possible shift value won't fit in the prefered type, just use
- // something safe. It will be legalized when the shift is expanded.
- if (!ShiftVT.isVector() &&
- ShiftVT.getSizeInBits() < Log2_32_Ceil(VT.getSizeInBits()))
- ShiftVT = MVT::i32;
- return ShiftVT;
-}
-
SDValue DAGTypeLegalizer::PromoteIntRes_FREEZE(SDNode *N) {
SDValue V = GetPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::FREEZE, SDLoc(N),
@@ -474,7 +514,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
}
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
+ EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
DAG.getConstant(DiffBits, dl, ShiftVT));
}
@@ -496,7 +536,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
}
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
+ EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
return DAG.getNode(ISD::SRL, dl, NVT,
DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
DAG.getConstant(DiffBits, dl, ShiftVT));
@@ -526,11 +566,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+ SDLoc dl(N);
+
+ // If the larger CTLZ isn't supported by the target, try to expand now.
+ // If we expand later we'll end up with more operations since we lost the
+ // original type.
+ if (!OVT.isVector() && TLI.isTypeLegal(NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTLZ, NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTLZ_ZERO_UNDEF, NVT)) {
+ if (SDValue Result = TLI.expandCTLZ(N, DAG)) {
+ Result = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Result);
+ return Result;
+ }
+ }
+
// Zero extend to the promoted type and do the count there.
SDValue Op = ZExtPromotedInteger(N->getOperand(0));
- SDLoc dl(N);
- EVT OVT = N->getValueType(0);
- EVT NVT = Op.getValueType();
Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);
// Subtract off the extra leading bits in the bigger type.
return DAG.getNode(
@@ -540,6 +593,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+
+ // If the larger CTPOP isn't supported by the target, try to expand now.
+ // If we expand later we'll end up with more operations since we lost the
+ // original type.
+ // TODO: Expand ISD::PARITY. Need to move ExpandPARITY from LegalizeDAG to
+ // TargetLowering.
+ if (N->getOpcode() == ISD::CTPOP && !OVT.isVector() && TLI.isTypeLegal(NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTPOP, NVT)) {
+ if (SDValue Result = TLI.expandCTPOP(N, DAG)) {
+ Result = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Result);
+ return Result;
+ }
+ }
+
// Zero extend to the promoted type and do the count or parity there.
SDValue Op = ZExtPromotedInteger(N->getOperand(0));
return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op);
@@ -550,6 +619,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
EVT OVT = N->getValueType(0);
EVT NVT = Op.getValueType();
SDLoc dl(N);
+
+ // If the larger CTTZ isn't supported by the target, try to expand now.
+ // If we expand later we'll end up with more operations since we lost the
+ // original type. Don't expand if we can use CTPOP or CTLZ expansion on the
+ // larger type.
+ if (!OVT.isVector() && TLI.isTypeLegal(NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTTZ, NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTTZ_ZERO_UNDEF, NVT) &&
+ !TLI.isOperationLegal(ISD::CTPOP, NVT) &&
+ !TLI.isOperationLegal(ISD::CTLZ, NVT)) {
+ if (SDValue Result = TLI.expandCTTZ(N, DAG)) {
+ Result = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Result);
+ return Result;
+ }
+ }
+
if (N->getOpcode() == ISD::CTTZ) {
// The count is the same in the promoted type except if the original
// value was zero. This can be handled by setting the bit just off
@@ -702,11 +787,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ ExtType = ISD::EXTLOAD;
+
SDLoc dl(N);
SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
N->getOffset(), N->getMask(), ExtPassThru,
N->getMemoryVT(), N->getMemOperand(),
- N->getAddressingMode(), ISD::EXTLOAD);
+ N->getAddressingMode(), ExtType,
+ N->isExpandingLoad());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -792,7 +882,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
unsigned NewBits = PromotedType.getScalarSizeInBits();
if (Opcode == ISD::UADDSAT) {
- APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits);
+ APInt MaxVal = APInt::getAllOnes(OldBits).zext(NewBits);
SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
SDValue Add =
DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);
@@ -806,7 +896,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
// Shift cannot use a min/max expansion, we can't detect overflow if all of
// the bits have been shifted out.
- if (IsShift || TLI.isOperationLegalOrCustom(Opcode, PromotedType)) {
+ if (IsShift || TLI.isOperationLegal(Opcode, PromotedType)) {
unsigned ShiftOp;
switch (Opcode) {
case ISD::SADDSAT:
@@ -1103,12 +1193,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
return DAG.getSExtOrTrunc(SetCC, dl, NVT);
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N, bool IsVP) {
SDValue LHS = GetPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
- return DAG.getNode(ISD::SHL, SDLoc(N), LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
@@ -1117,30 +1210,36 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
Op.getValueType(), Op, N->getOperand(1));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N, bool IsVP) {
// The input may have strange things in the top bits of the registers, but
// these operations don't care. They may have weird bits going out, but
// that too is okay if they are integer operations.
SDValue LHS = GetPromotedInteger(N->getOperand(0));
SDValue RHS = GetPromotedInteger(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N, bool IsVP) {
// Sign extend the input.
SDValue LHS = SExtPromotedInteger(N->getOperand(0));
SDValue RHS = SExtPromotedInteger(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N, bool IsVP) {
// Zero extend the input.
SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) {
@@ -1152,22 +1251,28 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) {
LHS.getValueType(), LHS, RHS);
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N, bool IsVP) {
// The input value must be properly sign extended.
SDValue LHS = SExtPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
- return DAG.getNode(ISD::SRA, SDLoc(N), LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N, bool IsVP) {
// The input value must be properly zero extended.
SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
- return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) {
@@ -1383,7 +1488,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
if (N->getOpcode() == ISD::UMULO) {
// Unsigned overflow occurred if the high part is non-zero.
unsigned Shift = SmallVT.getScalarSizeInBits();
- EVT ShiftTy = getShiftAmountTyForConstant(Mul.getValueType(), TLI, DAG);
+ EVT ShiftTy = TLI.getShiftAmountTy(Mul.getValueType(), DAG.getDataLayout());
SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
DAG.getConstant(Shift, DL, ShiftTy));
Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
@@ -1523,6 +1628,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break;
case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::INSERT_SUBVECTOR: Res = PromoteIntOp_INSERT_SUBVECTOR(N); break;
case ISD::SHL:
case ISD::SRA:
@@ -1560,6 +1666,17 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN: Res = PromoteIntOp_VECREDUCE(N); break;
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ Res = PromoteIntOp_VP_REDUCE(N, OpNo);
+ break;
case ISD::SET_ROUNDING: Res = PromoteIntOp_SET_ROUNDING(N); break;
}
@@ -1605,10 +1722,8 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
// If the width of OpL/OpR excluding the duplicated sign bits is no greater
// than the width of NewLHS/NewRH, we can avoid inserting real truncate
// instruction, which is redundant eventually.
- unsigned OpLEffectiveBits =
- OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1;
- unsigned OpREffectiveBits =
- OpR.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1;
+ unsigned OpLEffectiveBits = DAG.ComputeMinSignedBits(OpL);
+ unsigned OpREffectiveBits = DAG.ComputeMinSignedBits(OpR);
if (OpLEffectiveBits <= NewLHS.getScalarValueSizeInBits() &&
OpREffectiveBits <= NewRHS.getScalarValueSizeInBits()) {
NewLHS = OpL;
@@ -1832,29 +1947,25 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
unsigned OpNo) {
-
SDValue DataOp = N->getValue();
- EVT DataVT = DataOp.getValueType();
SDValue Mask = N->getMask();
- SDLoc dl(N);
- bool TruncateStore = false;
if (OpNo == 4) {
+ // The Mask. Update in place.
+ EVT DataVT = DataOp.getValueType();
Mask = PromoteTargetBoolean(Mask, DataVT);
- // Update in place.
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
NewOps[4] = Mask;
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
- } else { // Data operand
- assert(OpNo == 1 && "Unexpected operand for promotion");
- DataOp = GetPromotedInteger(DataOp);
- TruncateStore = true;
}
- return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(),
+ assert(OpNo == 1 && "Unexpected operand for promotion");
+ DataOp = GetPromotedInteger(DataOp);
+
+ return DAG.getMaskedStore(N->getChain(), SDLoc(N), DataOp, N->getBasePtr(),
N->getOffset(), Mask, N->getMemoryVT(),
N->getMemOperand(), N->getAddressingMode(),
- TruncateStore, N->isCompressingStore());
+ /*IsTruncating*/ true, N->isCompressingStore());
}
SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
@@ -2023,30 +2134,54 @@ SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) {
return SDValue();
}
-SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
- SDLoc dl(N);
- SDValue Op;
+static unsigned getExtendForIntVecReduction(SDNode *N) {
switch (N->getOpcode()) {
- default: llvm_unreachable("Expected integer vector reduction");
+ default:
+ llvm_unreachable("Expected integer vector reduction");
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_XOR:
- Op = GetPromotedInteger(N->getOperand(0));
- break;
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ return ISD::ANY_EXTEND;
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_SMIN:
- Op = SExtPromotedInteger(N->getOperand(0));
- break;
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ return ISD::SIGN_EXTEND;
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
- Op = ZExtPromotedInteger(N->getOperand(0));
- break;
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ return ISD::ZERO_EXTEND;
}
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOpVectorReduction(SDNode *N, SDValue V) {
+ switch (getExtendForIntVecReduction(N)) {
+ default:
+ llvm_unreachable("Impossible extension kind for integer reduction");
+ case ISD::ANY_EXTEND:
+ return GetPromotedInteger(V);
+ case ISD::SIGN_EXTEND:
+ return SExtPromotedInteger(V);
+ case ISD::ZERO_EXTEND:
+ return ZExtPromotedInteger(V);
+ }
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Op = PromoteIntOpVectorReduction(N, N->getOperand(0));
EVT EltVT = Op.getValueType().getVectorElementType();
EVT VT = N->getValueType(0);
+
if (VT.bitsGE(EltVT))
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, Op);
@@ -2056,6 +2191,38 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo) {
+ SDLoc DL(N);
+ SDValue Op = N->getOperand(OpNo);
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+
+ if (OpNo == 2) { // Mask
+ // Update in place.
+ NewOps[2] = PromoteTargetBoolean(Op, N->getOperand(1).getValueType());
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+ }
+
+ assert(OpNo == 1 && "Unexpected operand for promotion");
+
+ Op = PromoteIntOpVectorReduction(N, Op);
+
+ NewOps[OpNo] = Op;
+
+ EVT VT = N->getValueType(0);
+ EVT EltVT = Op.getValueType().getScalarType();
+
+ if (VT.bitsGE(EltVT))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, NewOps);
+
+ // Result size must be >= element/start-value size. If this is not the case
+ // after promotion, also promote both the start value and result type and
+ // then truncate.
+ NewOps[0] =
+ DAG.getNode(getExtendForIntVecReduction(N), DL, EltVT, N->getOperand(0));
+ SDValue Reduce = DAG.getNode(N->getOpcode(), DL, EltVT, NewOps);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, Reduce);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_SET_ROUNDING(SDNode *N) {
SDValue Op = ZExtPromotedInteger(N->getOperand(1));
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0);
@@ -2088,6 +2255,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
report_fatal_error("Do not know how to expand the result of this "
"operator!");
+ case ISD::ARITH_FENCE: SplitRes_ARITH_FENCE(N, Lo, Hi); break;
case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
@@ -2978,7 +3146,7 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
bool HasAddCarry = TLI.isOperationLegalOrCustom(
ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
if (HasAddCarry) {
- EVT ShiftAmtTy = getShiftAmountTyForConstant(NVT, TLI, DAG);
+ EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
SDValue Sign =
DAG.getNode(ISD::SRA, dl, NVT, Hi,
DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy));
@@ -3087,6 +3255,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
Op = GetSoftPromotedHalf(Op);
Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
+ Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
+ SplitInteger(Op, Lo, Hi);
+ return;
}
RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
@@ -3116,6 +3287,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
Op = GetSoftPromotedHalf(Op);
Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
+ Op = DAG.getNode(ISD::FP_TO_UINT, dl, VT, Op);
+ SplitInteger(Op, Lo, Hi);
+ return;
}
RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
@@ -3367,11 +3541,6 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask);
EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
- if (APInt::getMaxValue(ShiftAmtTy.getSizeInBits()).ult(HalfBits)) {
- // The type from TLI is too small to fit the shift amount we want.
- // Override it with i32. The shift will have to be legalized.
- ShiftAmtTy = MVT::i32;
- }
SDValue Shift = DAG.getConstant(HalfBits, dl, ShiftAmtTy);
SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift);
SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift);
@@ -3641,7 +3810,7 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
// Saturate to signed maximum.
APInt MaxHi = APInt::getSignedMaxValue(NVTSize);
- APInt MaxLo = APInt::getAllOnesValue(NVTSize);
+ APInt MaxLo = APInt::getAllOnes(NVTSize);
Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxHi, dl, NVT), Hi);
Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxLo, dl, NVT), Lo);
// Saturate to signed minimum.
@@ -3811,9 +3980,6 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
// the new SHL_PARTS operation would need further legalization.
SDValue ShiftOp = N->getOperand(1);
EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- assert(ShiftTy.getScalarSizeInBits() >=
- Log2_32_Ceil(VT.getScalarSizeInBits()) &&
- "ShiftAmountTy is too small to cover the range of this type!");
if (ShiftOp.getValueType() != ShiftTy)
ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy);
@@ -3860,7 +4026,10 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
}
if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
- SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ EVT ShAmtTy =
+ EVT::getIntegerVT(*DAG.getContext(), DAG.getLibInfo().getIntSize());
+ SDValue ShAmt = DAG.getZExtOrTrunc(N->getOperand(1), dl, ShAmtTy);
+ SDValue Ops[2] = {N->getOperand(0), ShAmt};
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(isSigned);
SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
@@ -4038,7 +4207,25 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
LC = RTLIB::MULO_I64;
else if (VT == MVT::i128)
LC = RTLIB::MULO_I128;
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!");
+
+ if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
+ // FIXME: This is not an optimal expansion, but better than crashing.
+ EVT WideVT =
+ EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
+ SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(0));
+ SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(1));
+ SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
+ SDValue MulLo, MulHi;
+ SplitInteger(Mul, MulLo, MulHi);
+ SDValue SRA =
+ DAG.getNode(ISD::SRA, dl, VT, MulLo,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, VT));
+ SDValue Overflow =
+ DAG.getSetCC(dl, N->getValueType(1), MulHi, SRA, ISD::SETNE);
+ SplitInteger(MulLo, Lo, Hi);
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return;
+ }
SDValue Temp = DAG.CreateStackTemporary(PtrVT);
// Temporary for the overflow value, default it to zero.
@@ -4191,18 +4378,45 @@ void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N,
void DAGTypeLegalizer::ExpandIntRes_Rotate(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- // Lower the rotate to shifts and ORs which can be expanded.
- SDValue Res;
- TLI.expandROT(N, true /*AllowVectorOps*/, Res, DAG);
+ // Delegate to funnel-shift expansion.
+ SDLoc DL(N);
+ unsigned Opcode = N->getOpcode() == ISD::ROTL ? ISD::FSHL : ISD::FSHR;
+ SDValue Res = DAG.getNode(Opcode, DL, N->getValueType(0), N->getOperand(0),
+ N->getOperand(0), N->getOperand(1));
SplitInteger(Res, Lo, Hi);
}
-void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N,
- SDValue &Lo, SDValue &Hi) {
- // Lower the funnel shift to shifts and ORs which can be expanded.
- SDValue Res;
- TLI.expandFunnelShift(N, Res, DAG);
- SplitInteger(Res, Lo, Hi);
+void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Values numbered from least significant to most significant.
+ SDValue In1, In2, In3, In4;
+ GetExpandedInteger(N->getOperand(0), In3, In4);
+ GetExpandedInteger(N->getOperand(1), In1, In2);
+ EVT HalfVT = In1.getValueType();
+
+ SDLoc DL(N);
+ unsigned Opc = N->getOpcode();
+ SDValue ShAmt = N->getOperand(2);
+ EVT ShAmtVT = ShAmt.getValueType();
+ EVT ShAmtCCVT = getSetCCResultType(ShAmtVT);
+
+ // If the shift amount is at least half the bitwidth, swap the inputs.
+ unsigned HalfVTBits = HalfVT.getScalarSizeInBits();
+ SDValue AndNode = DAG.getNode(ISD::AND, DL, ShAmtVT, ShAmt,
+ DAG.getConstant(HalfVTBits, DL, ShAmtVT));
+ SDValue Cond =
+ DAG.getSetCC(DL, ShAmtCCVT, AndNode, DAG.getConstant(0, DL, ShAmtVT),
+ Opc == ISD::FSHL ? ISD::SETNE : ISD::SETEQ);
+
+ // Expand to a pair of funnel shifts.
+ EVT NewShAmtVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
+ SDValue NewShAmt = DAG.getAnyExtOrTrunc(ShAmt, DL, NewShAmtVT);
+
+ SDValue Select1 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In1, In2);
+ SDValue Select2 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In2, In3);
+ SDValue Select3 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In3, In4);
+ Lo = DAG.getNode(Opc, DL, HalfVT, Select2, Select1, NewShAmt);
+ Hi = DAG.getNode(Opc, DL, HalfVT, Select3, Select2, NewShAmt);
}
void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo,
@@ -4300,7 +4514,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
if (RHSLo == RHSHi) {
if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) {
- if (RHSCST->isAllOnesValue()) {
+ if (RHSCST->isAllOnes()) {
// Equality comparison to -1.
NewLHS = DAG.getNode(ISD::AND, dl,
LHSLo.getValueType(), LHSLo, LHSHi);
@@ -4320,8 +4534,8 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
// If this is a comparison of the sign bit, just look at the top part.
// X > -1, x < 0
if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS))
- if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0
- (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1
+ if ((CCCode == ISD::SETLT && CST->isZero()) || // X < 0
+ (CCCode == ISD::SETGT && CST->isAllOnes())) { // X > -1
NewLHS = LHSHi;
NewRHS = RHSHi;
return;
@@ -4372,9 +4586,11 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
bool EqAllowed = (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
CCCode == ISD::SETUGE || CCCode == ISD::SETULE);
- if ((EqAllowed && (HiCmpC && HiCmpC->isNullValue())) ||
- (!EqAllowed && ((HiCmpC && (HiCmpC->getAPIntValue() == 1)) ||
- (LoCmpC && LoCmpC->isNullValue())))) {
+ // FIXME: Is the HiCmpC->isOne() here correct for
+ // ZeroOrNegativeOneBooleanContent.
+ if ((EqAllowed && (HiCmpC && HiCmpC->isZero())) ||
+ (!EqAllowed &&
+ ((HiCmpC && HiCmpC->isOne()) || (LoCmpC && LoCmpC->isZero())))) {
// For LE / GE, if high part is known false, ignore the low part.
// For LT / GT: if low part is known false, return the high part.
// if high part is known true, ignore the low part.
@@ -4709,6 +4925,30 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
SDValue InOp0 = N->getOperand(0);
EVT InVT = InOp0.getValueType();
+ // Try and extract from a smaller type so that it eventually falls
+ // into the promotion code below.
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector ||
+ getTypeAction(InVT) == TargetLowering::TypeLegal) {
+ EVT NInVT = InVT.getHalfNumVectorElementsVT(*DAG.getContext());
+ unsigned NElts = NInVT.getVectorMinNumElements();
+ uint64_t IdxVal = cast<ConstantSDNode>(BaseIdx)->getZExtValue();
+
+ SDValue Step1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NInVT, InOp0,
+ DAG.getConstant(alignDown(IdxVal, NElts), dl,
+ BaseIdx.getValueType()));
+ SDValue Step2 = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, OutVT, Step1,
+ DAG.getConstant(IdxVal % NElts, dl, BaseIdx.getValueType()));
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Step2);
+ }
+
+ // Try and extract from a widened type.
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ SDValue Ops[] = {GetWidenedVector(InOp0), BaseIdx};
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), OutVT, Ops);
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Ext);
+ }
+
// Promote operands and see if this is handled by target lowering,
// Otherwise, use the BUILD_VECTOR approach below
if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) {
@@ -4876,11 +5116,46 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ unsigned NumOperands = N->getNumOperands();
+ unsigned NumOutElem = NOutVT.getVectorMinNumElements();
EVT OutElemTy = NOutVT.getVectorElementType();
+ if (OutVT.isScalableVector()) {
+ // Find the largest promoted element type for each of the operands.
+ SDUse *MaxSizedValue = std::max_element(
+ N->op_begin(), N->op_end(), [](const SDValue &A, const SDValue &B) {
+ EVT AVT = A.getValueType().getVectorElementType();
+ EVT BVT = B.getValueType().getVectorElementType();
+ return AVT.getScalarSizeInBits() < BVT.getScalarSizeInBits();
+ });
+ EVT MaxElementVT = MaxSizedValue->getValueType().getVectorElementType();
+
+ // Then promote all vectors to the largest element type.
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned I = 0; I < NumOperands; ++I) {
+ SDValue Op = N->getOperand(I);
+ EVT OpVT = Op.getValueType();
+ if (getTypeAction(OpVT) == TargetLowering::TypePromoteInteger)
+ Op = GetPromotedInteger(Op);
+ else
+ assert(getTypeAction(OpVT) == TargetLowering::TypeLegal &&
+ "Unhandled legalization type");
+
+ if (OpVT.getVectorElementType().getScalarSizeInBits() <
+ MaxElementVT.getScalarSizeInBits())
+ Op = DAG.getAnyExtOrTrunc(Op, dl,
+ OpVT.changeVectorElementType(MaxElementVT));
+ Ops.push_back(Op);
+ }
+
+ // Do the CONCAT on the promoted type and finally truncate to (the promoted)
+ // NOutVT.
+ return DAG.getAnyExtOrTrunc(
+ DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ OutVT.changeVectorElementType(MaxElementVT), Ops),
+ dl, NOutVT);
+ }
unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements();
- unsigned NumOutElem = NOutVT.getVectorNumElements();
- unsigned NumOperands = N->getNumOperands();
assert(NumElem * NumOperands == NumOutElem &&
"Unexpected number of elements");
@@ -4960,7 +5235,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECREDUCE(SDNode *N) {
// we can simply change the result type.
SDLoc dl(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->ops());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VP_REDUCE(SDNode *N) {
+ // The VP_REDUCE result size may be larger than the element size, so we can
+ // simply change the result type. However the start value and result must be
+ // the same.
+ SDLoc DL(N);
+ SDValue Start = PromoteIntOpVectorReduction(N, N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), DL, Start.getValueType(), Start,
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
}
SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -4977,6 +5262,21 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
}
+SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_SUBVECTOR(SDNode *N) {
+ SDLoc dl(N);
+ // The result type is equal to the first input operand's type, so the
+ // type that needs promoting must be the second source vector.
+ SDValue V0 = N->getOperand(0);
+ SDValue V1 = GetPromotedInteger(N->getOperand(1));
+ SDValue Idx = N->getOperand(2);
+ EVT PromVT = EVT::getVectorVT(*DAG.getContext(),
+ V1.getValueType().getVectorElementType(),
+ V0.getValueType().getVectorElementCount());
+ V0 = DAG.getAnyExtOrTrunc(V0, dl, PromVT);
+ SDValue Ext = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, PromVT, V0, V1, Idx);
+ return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) {
SDLoc dl(N);
SDValue V0 = GetPromotedInteger(N->getOperand(0));
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 05a974af3b55..1f73c9eea104 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -223,8 +223,7 @@ bool DAGTypeLegalizer::run() {
#endif
PerformExpensiveChecks();
- SDNode *N = Worklist.back();
- Worklist.pop_back();
+ SDNode *N = Worklist.pop_back_val();
assert(N->getNodeId() == ReadyToProcess &&
"Node should be ready if on worklist!");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 8d17d8fc68b1..da282ecad282 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -289,6 +289,12 @@ private:
return DAG.getZeroExtendInReg(Op, DL, OldVT);
}
+ // Promote the given operand V (vector or scalar) according to N's specific
+ // reduction kind. N must be an integer VECREDUCE_* or VP_REDUCE_*. Returns
+ // the nominal extension opcode (ISD::(ANY|ZERO|SIGN)_EXTEND) and the
+ // promoted value.
+ SDValue PromoteIntOpVectorReduction(SDNode *N, SDValue V);
+
// Integer Result Promotion.
void PromoteIntegerResult(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
@@ -332,14 +338,14 @@ private:
SDValue PromoteIntRes_VSELECT(SDNode *N);
SDValue PromoteIntRes_SELECT_CC(SDNode *N);
SDValue PromoteIntRes_SETCC(SDNode *N);
- SDValue PromoteIntRes_SHL(SDNode *N);
- SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
- SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N);
- SDValue PromoteIntRes_SExtIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_SHL(SDNode *N, bool IsVP);
+ SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N, bool IsVP);
+ SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N, bool IsVP);
+ SDValue PromoteIntRes_SExtIntBinOp(SDNode *N, bool IsVP);
SDValue PromoteIntRes_UMINUMAX(SDNode *N);
SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
- SDValue PromoteIntRes_SRA(SDNode *N);
- SDValue PromoteIntRes_SRL(SDNode *N);
+ SDValue PromoteIntRes_SRA(SDNode *N, bool IsVP);
+ SDValue PromoteIntRes_SRL(SDNode *N, bool IsVP);
SDValue PromoteIntRes_TRUNCATE(SDNode *N);
SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo);
@@ -353,6 +359,7 @@ private:
SDValue PromoteIntRes_DIVFIX(SDNode *N);
SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
SDValue PromoteIntRes_VECREDUCE(SDNode *N);
+ SDValue PromoteIntRes_VP_REDUCE(SDNode *N);
SDValue PromoteIntRes_ABS(SDNode *N);
SDValue PromoteIntRes_Rotate(SDNode *N);
SDValue PromoteIntRes_FunnelShift(SDNode *N);
@@ -369,6 +376,7 @@ private:
SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue PromoteIntOp_INSERT_SUBVECTOR(SDNode *N);
SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N);
@@ -394,6 +402,7 @@ private:
SDValue PromoteIntOp_FIX(SDNode *N);
SDValue PromoteIntOp_FPOWI(SDNode *N);
SDValue PromoteIntOp_VECREDUCE(SDNode *N);
+ SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SET_ROUNDING(SDNode *N);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -518,6 +527,7 @@ private:
SDValue SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC);
SDValue SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC);
SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_ARITH_FENCE(SDNode *N);
SDValue SoftenFloatRes_BITCAST(SDNode *N);
SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
SDValue SoftenFloatRes_ConstantFP(SDNode *N);
@@ -816,7 +826,7 @@ private:
// Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
void SplitVectorResult(SDNode *N, unsigned ResNo);
- void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi, bool IsVP);
void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -898,6 +908,7 @@ private:
SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N);
SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
+ SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
SDValue WidenVecRes_LOAD(SDNode* N);
SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
@@ -912,7 +923,7 @@ private:
SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
SDValue WidenVecRes_Ternary(SDNode *N);
- SDValue WidenVecRes_Binary(SDNode *N);
+ SDValue WidenVecRes_Binary(SDNode *N, bool IsVP);
SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N);
SDValue WidenVecRes_StrictFP(SDNode *N);
@@ -972,10 +983,10 @@ private:
LoadSDNode *LD, ISD::LoadExtType ExtType);
/// Helper function to generate a set of stores to store a widen vector into
- /// non-widen memory.
+ /// non-widen memory. Returns true if successful, false otherwise.
/// StChain: list of chains for the stores we have generated
/// ST: store of a widen value
- void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);
+ bool GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
/// input vector must have the same element type as NVT.
@@ -1011,6 +1022,7 @@ private:
// Generic Result Splitting.
void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
SDValue &Lo, SDValue &Hi);
+ void SplitRes_ARITH_FENCE (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 81cc2bf10d25..3d3c9a2ad837 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -571,3 +571,13 @@ void DAGTypeLegalizer::SplitRes_FREEZE(SDNode *N, SDValue &Lo, SDValue &Hi) {
Lo = DAG.getNode(ISD::FREEZE, dl, L.getValueType(), L);
Hi = DAG.getNode(ISD::FREEZE, dl, H.getValueType(), H);
}
+
+void DAGTypeLegalizer::SplitRes_ARITH_FENCE(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue L, H;
+ SDLoc DL(N);
+ GetSplitOp(N->getOperand(0), L, H);
+
+ Lo = DAG.getNode(ISD::ARITH_FENCE, DL, L.getValueType(), L);
+ Hi = DAG.getNode(ISD::ARITH_FENCE, DL, H.getValueType(), H);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index ebe3bfc4b75a..88a28a3be53e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -538,8 +538,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
return RecursivelyLegalizeResults(Op, ResultVals);
}
-// FIXME: This is very similar to the X86 override of
-// TargetLowering::LowerOperationWrapper. Can we merge them somehow?
+// FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we
+// merge them somehow?
bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
@@ -774,8 +774,8 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
ExpandSETCC(Node, Results);
return;
case ISD::ABS:
- if (TLI.expandABS(Node, Tmp, DAG)) {
- Results.push_back(Tmp);
+ if (SDValue Expanded = TLI.expandABS(Node, DAG)) {
+ Results.push_back(Expanded);
return;
}
break;
@@ -783,22 +783,22 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
ExpandBITREVERSE(Node, Results);
return;
case ISD::CTPOP:
- if (TLI.expandCTPOP(Node, Tmp, DAG)) {
- Results.push_back(Tmp);
+ if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) {
+ Results.push_back(Expanded);
return;
}
break;
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
- if (TLI.expandCTLZ(Node, Tmp, DAG)) {
- Results.push_back(Tmp);
+ if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) {
+ Results.push_back(Expanded);
return;
}
break;
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
- if (TLI.expandCTTZ(Node, Tmp, DAG)) {
- Results.push_back(Tmp);
+ if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) {
+ Results.push_back(Expanded);
return;
}
break;
@@ -943,10 +943,8 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
// What is the size of each element in the vector mask.
EVT BitTy = MaskTy.getScalarType();
- Mask = DAG.getSelect(DL, BitTy, Mask,
- DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL,
- BitTy),
- DAG.getConstant(0, DL, BitTy));
+ Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getAllOnesConstant(DL, BitTy),
+ DAG.getConstant(0, DL, BitTy));
// Broadcast the mask so that the entire vector is all one or all zero.
if (VT.isFixedLengthVector())
@@ -960,9 +958,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
- SDValue AllOnes = DAG.getConstant(
- APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy);
- SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
+ SDValue NotMask = DAG.getNOT(DL, Mask, MaskTy);
Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
@@ -1099,25 +1095,45 @@ static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {
EVT VT = Node->getValueType(0);
+ // Scalable vectors can't use shuffle expansion.
+ if (VT.isScalableVector())
+ return TLI.expandBSWAP(Node, DAG);
+
// Generate a byte wise shuffle mask for the BSWAP.
SmallVector<int, 16> ShuffleMask;
createBSWAPShuffleMask(VT, ShuffleMask);
EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
// Only emit a shuffle if the mask is legal.
- if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
- return DAG.UnrollVectorOp(Node);
+ if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) {
+ SDLoc DL(Node);
+ SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
+ Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ }
- SDLoc DL(Node);
- SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
- Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
- return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ // If we have the appropriate vector bit operations, it is better to use them
+ // than unrolling and expanding each component.
+ if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
+ return TLI.expandBSWAP(Node, DAG);
+
+ // Otherwise unroll.
+ return DAG.UnrollVectorOp(Node);
}
void VectorLegalizer::ExpandBITREVERSE(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
EVT VT = Node->getValueType(0);
+ // We can't unroll or use shuffles for scalable vectors.
+ if (VT.isScalableVector()) {
+ Results.push_back(TLI.expandBITREVERSE(Node, DAG));
+ return;
+ }
+
// If we have the scalar operation, it's probably cheaper to unroll it.
if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) {
SDValue Tmp = DAG.UnrollVectorOp(Node);
@@ -1156,9 +1172,10 @@ void VectorLegalizer::ExpandBITREVERSE(SDNode *Node,
if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
- TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
- // Let LegalizeDAG handle this later.
+ TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) {
+ Results.push_back(TLI.expandBITREVERSE(Node, DAG));
return;
+ }
// Otherwise unroll.
SDValue Tmp = DAG.UnrollVectorOp(Node);
@@ -1207,9 +1224,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
- SDValue AllOnes = DAG.getConstant(
- APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT);
- SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
+ SDValue NotMask = DAG.getNOT(DL, Mask, VT);
Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
@@ -1502,9 +1517,8 @@ void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
if (Node->getOpcode() == ISD::STRICT_FSETCC ||
Node->getOpcode() == ISD::STRICT_FSETCCS)
ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult,
- DAG.getConstant(APInt::getAllOnesValue
- (EltVT.getSizeInBits()), dl, EltVT),
- DAG.getConstant(0, dl, EltVT));
+ DAG.getAllOnesConstant(dl, EltVT),
+ DAG.getConstant(0, dl, EltVT));
OpValues.push_back(ScalarResult);
OpChains.push_back(ScalarChain);
@@ -1536,9 +1550,7 @@ SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
TLI.getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(), TmpEltVT),
LHSElem, RHSElem, CC);
- Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
- DAG.getConstant(APInt::getAllOnesValue
- (EltVT.getSizeInBits()), dl, EltVT),
+ Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], DAG.getAllOnesConstant(dl, EltVT),
DAG.getConstant(0, dl, EltVT));
}
return DAG.getBuildVector(VT, dl, Ops);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 91242bbf866f..539c9cb9c256 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -529,7 +529,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
SDValue Arg = N->getOperand(2).getOperand(0);
if (Arg.isUndef())
return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
- unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue();
+ unsigned Op = !cast<ConstantSDNode>(Arg)->isZero();
return GetScalarizedVector(N->getOperand(Op));
}
@@ -1045,7 +1045,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::USHLSAT:
case ISD::ROTL:
case ISD::ROTR:
- SplitVecRes_BinOp(N, Lo, Hi);
+ SplitVecRes_BinOp(N, Lo, Hi, /*IsVP*/ false);
break;
case ISD::FMA:
case ISD::FSHL:
@@ -1082,6 +1082,26 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UDIVFIXSAT:
SplitVecRes_FIX(N, Lo, Hi);
break;
+ case ISD::VP_ADD:
+ case ISD::VP_AND:
+ case ISD::VP_MUL:
+ case ISD::VP_OR:
+ case ISD::VP_SUB:
+ case ISD::VP_XOR:
+ case ISD::VP_SHL:
+ case ISD::VP_LSHR:
+ case ISD::VP_ASHR:
+ case ISD::VP_SDIV:
+ case ISD::VP_UDIV:
+ case ISD::VP_SREM:
+ case ISD::VP_UREM:
+ case ISD::VP_FADD:
+ case ISD::VP_FSUB:
+ case ISD::VP_FMUL:
+ case ISD::VP_FDIV:
+ case ISD::VP_FREM:
+ SplitVecRes_BinOp(N, Lo, Hi, /*IsVP*/ true);
+ break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1113,8 +1133,8 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
}
}
-void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi,
+ bool IsVP) {
SDValue LHSLo, LHSHi;
GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
SDValue RHSLo, RHSHi;
@@ -1123,8 +1143,41 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
const SDNodeFlags Flags = N->getFlags();
unsigned Opcode = N->getOpcode();
- Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
- Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
+ if (!IsVP) {
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
+ return;
+ }
+
+ // Split the mask.
+ SDValue MaskLo, MaskHi;
+ SDValue Mask = N->getOperand(2);
+ EVT MaskVT = Mask.getValueType();
+ if (getTypeAction(MaskVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, SDLoc(Mask));
+
+ // Split the vector length parameter.
+ // %evl -> umin(%evl, %halfnumelts) and usubsat(%evl - %halfnumelts).
+ SDValue EVL = N->getOperand(3);
+ EVT VecVT = N->getValueType(0);
+ EVT EVLVT = EVL.getValueType();
+ assert(VecVT.getVectorElementCount().isKnownEven() &&
+ "Expecting the mask to be an evenly-sized vector");
+ unsigned HalfMinNumElts = VecVT.getVectorMinNumElements() / 2;
+ SDValue HalfNumElts =
+ VecVT.isFixedLengthVector()
+ ? DAG.getConstant(HalfMinNumElts, dl, EVLVT)
+ : DAG.getVScale(dl, EVLVT,
+ APInt(EVLVT.getScalarSizeInBits(), HalfMinNumElts));
+ SDValue EVLLo = DAG.getNode(ISD::UMIN, dl, EVLVT, EVL, HalfNumElts);
+ SDValue EVLHi = DAG.getNode(ISD::USUBSAT, dl, EVLVT, EVL, HalfNumElts);
+
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(),
+ {LHSLo, RHSLo, MaskLo, EVLLo}, Flags);
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(),
+ {LHSHi, RHSHi, MaskHi, EVLHi}, Flags);
}
void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
@@ -2985,6 +3038,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
+ case ISD::INSERT_SUBVECTOR:
+ Res = WidenVecRes_INSERT_SUBVECTOR(N);
+ break;
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
@@ -3035,7 +3091,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::USHLSAT:
case ISD::ROTL:
case ISD::ROTR:
- Res = WidenVecRes_Binary(N);
+ Res = WidenVecRes_Binary(N, /*IsVP*/ false);
break;
case ISD::FADD:
@@ -3159,6 +3215,31 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FSHR:
Res = WidenVecRes_Ternary(N);
break;
+ case ISD::VP_ADD:
+ case ISD::VP_AND:
+ case ISD::VP_MUL:
+ case ISD::VP_OR:
+ case ISD::VP_SUB:
+ case ISD::VP_XOR:
+ case ISD::VP_SHL:
+ case ISD::VP_LSHR:
+ case ISD::VP_ASHR:
+ case ISD::VP_SDIV:
+ case ISD::VP_UDIV:
+ case ISD::VP_SREM:
+ case ISD::VP_UREM:
+ case ISD::VP_FADD:
+ case ISD::VP_FSUB:
+ case ISD::VP_FMUL:
+ case ISD::VP_FDIV:
+ case ISD::VP_FREM:
+ // Vector-predicated binary op widening. Note that -- unlike the
+ // unpredicated versions -- we don't have to worry about trapping on
+ // operations like UDIV, FADD, etc., as we pass on the original vector
+ // length parameter. This means the widened elements containing garbage
+ // aren't active.
+ Res = WidenVecRes_Binary(N, /*IsVP*/ true);
+ break;
}
// If Res is null, the sub-method took care of registering the result.
@@ -3176,13 +3257,31 @@ SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
}
-SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N, bool IsVP) {
// Binary op widening.
SDLoc dl(N);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags());
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2,
+ N->getFlags());
+ // For VP operations, we must also widen the mask. Note that the mask type
+ // may not actually need widening, leading it be split along with the VP
+ // operation.
+ // FIXME: This could lead to an infinite split/widen loop. We only handle the
+ // case where the mask needs widening to an identically-sized type as the
+ // vector inputs.
+ SDValue Mask = N->getOperand(2);
+ assert(getTypeAction(Mask.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen binary VP op");
+ Mask = GetWidenedVector(Mask);
+ assert(Mask.getValueType().getVectorElementCount() ==
+ WidenVT.getVectorElementCount() &&
+ "Unable to widen binary VP op");
+ return DAG.getNode(N->getOpcode(), dl, WidenVT,
+ {InOp1, InOp2, Mask, N->getOperand(3)}, N->getFlags());
}
SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) {
@@ -3527,7 +3626,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
SDLoc DL(N);
EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(0));
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ ElementCount WidenEC = WidenVT.getVectorElementCount();
EVT InVT = InOp.getValueType();
@@ -3547,14 +3646,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
}
EVT InEltVT = InVT.getVectorElementType();
- EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenNumElts);
- unsigned InVTNumElts = InVT.getVectorNumElements();
+ EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenEC);
+ ElementCount InVTEC = InVT.getVectorElementCount();
if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
InOp = GetWidenedVector(N->getOperand(0));
InVT = InOp.getValueType();
- InVTNumElts = InVT.getVectorNumElements();
- if (InVTNumElts == WidenNumElts) {
+ InVTEC = InVT.getVectorElementCount();
+ if (InVTEC == WidenEC) {
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InOp);
return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
@@ -3578,9 +3677,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// it an illegal type that might lead to repeatedly splitting the input
// and then widening it. To avoid this, we widen the input only if
// it results in a legal type.
- if (WidenNumElts % InVTNumElts == 0) {
+ if (WidenEC.isKnownMultipleOf(InVTEC.getKnownMinValue())) {
// Widen the input and call convert on the widened input vector.
- unsigned NumConcat = WidenNumElts/InVTNumElts;
+ unsigned NumConcat =
+ WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue();
SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
Ops[0] = InOp;
SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
@@ -3589,7 +3689,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);
}
- if (InVTNumElts % WidenNumElts == 0) {
+ if (InVTEC.isKnownMultipleOf(WidenEC.getKnownMinValue())) {
SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,
DAG.getVectorIdxConstant(0, DL));
// Extract the input and convert the shorten input vector.
@@ -3601,7 +3701,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// Otherwise unroll into some nasty scalar code and rebuild the vector.
EVT EltVT = WidenVT.getVectorElementType();
- SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getUNDEF(EltVT));
// Use the original element count so we don't do more scalar opts than
// necessary.
unsigned MinElts = N->getValueType(0).getVectorNumElements();
@@ -3962,14 +4062,26 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
return DAG.getBuildVector(WidenVT, dl, Ops);
}
+SDValue DAGTypeLegalizer::WidenVecRes_INSERT_SUBVECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ SDLoc dl(N);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WidenVT, InOp1, InOp2, Idx);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue InOp = N->getOperand(0);
SDValue Idx = N->getOperand(1);
SDLoc dl(N);
- if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+ auto InOpTypeAction = getTypeAction(InOp.getValueType());
+ if (InOpTypeAction == TargetLowering::TypeWidenVector)
InOp = GetWidenedVector(InOp);
EVT InVT = InOp.getValueType();
@@ -3979,20 +4091,49 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
if (IdxVal == 0 && InVT == WidenVT)
return InOp;
- if (VT.isScalableVector())
- report_fatal_error("Don't know how to widen the result of "
- "EXTRACT_SUBVECTOR for scalable vectors");
-
// Check if we can extract from the vector.
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
- unsigned InNumElts = InVT.getVectorNumElements();
+ unsigned WidenNumElts = WidenVT.getVectorMinNumElements();
+ unsigned InNumElts = InVT.getVectorMinNumElements();
if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
+ if (VT.isScalableVector()) {
+ // Try to split the operation up into smaller extracts and concat the
+ // results together, e.g.
+ // nxv6i64 extract_subvector(nxv12i64, 6)
+ // <->
+ // nxv8i64 concat(
+ // nxv2i64 extract_subvector(nxv16i64, 6)
+ // nxv2i64 extract_subvector(nxv16i64, 8)
+ // nxv2i64 extract_subvector(nxv16i64, 10)
+ // undef)
+ unsigned VTNElts = VT.getVectorMinNumElements();
+ unsigned GCD = greatestCommonDivisor(VTNElts, WidenNumElts);
+ assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken "
+ "down type's element count");
+ EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ ElementCount::getScalable(GCD));
+ // Avoid recursion around e.g. nxv1i8.
+ if (getTypeAction(PartVT) != TargetLowering::TypeWidenVector) {
+ SmallVector<SDValue> Parts;
+ unsigned I = 0;
+ for (; I < VTNElts / GCD; ++I)
+ Parts.push_back(
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, PartVT, InOp,
+ DAG.getVectorIdxConstant(IdxVal + I * GCD, dl)));
+ for (; I < WidenNumElts / GCD; ++I)
+ Parts.push_back(DAG.getUNDEF(PartVT));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
+ }
+
+ report_fatal_error("Don't know how to widen the result of "
+ "EXTRACT_SUBVECTOR for scalable vectors");
+ }
+
// We could try widening the input to the right length but for now, extract
// the original elements, fill the rest with undefs and build a vector.
SmallVector<SDValue, 16> Ops(WidenNumElts);
- EVT EltVT = VT.getVectorElementType();
unsigned NumElts = VT.getVectorNumElements();
unsigned i;
for (i = 0; i < NumElts; ++i)
@@ -4037,20 +4178,55 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
else
Result = GenWidenVectorLoads(LdChain, LD);
- // If we generate a single load, we can use that for the chain. Otherwise,
- // build a factor node to remember the multiple loads are independent and
- // chain to that.
- SDValue NewChain;
- if (LdChain.size() == 1)
- NewChain = LdChain[0];
- else
- NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
+ if (Result) {
+ // If we generate a single load, we can use that for the chain. Otherwise,
+ // build a factor node to remember the multiple loads are independent and
+ // chain to that.
+ SDValue NewChain;
+ if (LdChain.size() == 1)
+ NewChain = LdChain[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
- // Modified the chain - switch anything that used the old chain to use
- // the new one.
- ReplaceValueWith(SDValue(N, 1), NewChain);
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewChain);
- return Result;
+ return Result;
+ }
+
+ // Generate a vector-predicated load if it is custom/legal on the target. To
+ // avoid possible recursion, only do this if the widened mask type is legal.
+ // FIXME: Not all targets may support EVL in VP_LOAD. These will have been
+ // removed from the IR by the ExpandVectorPredication pass but we're
+ // reintroducing them here.
+ EVT LdVT = LD->getMemoryVT();
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), LdVT);
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ WideVT.getVectorElementCount());
+ if (ExtType == ISD::NON_EXTLOAD && WideVT.isScalableVector() &&
+ TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WideVT) &&
+ TLI.isTypeLegal(WideMaskVT)) {
+ SDLoc DL(N);
+ SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
+ MVT EVLVT = TLI.getVPExplicitVectorLengthTy();
+ unsigned NumVTElts = LdVT.getVectorMinNumElements();
+ SDValue EVL =
+ DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts));
+ const auto *MMO = LD->getMemOperand();
+ SDValue NewLoad =
+ DAG.getLoadVP(WideVT, DL, LD->getChain(), LD->getBasePtr(), Mask, EVL,
+ MMO->getPointerInfo(), MMO->getAlign(), MMO->getFlags(),
+ MMO->getAAInfo());
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewLoad.getValue(1));
+
+ return NewLoad;
+ }
+
+ report_fatal_error("Unable to widen vector load");
}
SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
@@ -4351,7 +4527,7 @@ SDValue DAGTypeLegalizer::WidenVSELECTMask(SDNode *N) {
SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ ElementCount WidenEC = WidenVT.getVectorElementCount();
SDValue Cond1 = N->getOperand(0);
EVT CondVT = Cond1.getValueType();
@@ -4365,8 +4541,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
}
EVT CondEltVT = CondVT.getVectorElementType();
- EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(),
- CondEltVT, WidenNumElts);
+ EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenEC);
if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
Cond1 = GetWidenedVector(Cond1);
@@ -4891,12 +5066,42 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
return TLI.scalarizeVectorStore(ST, DAG);
SmallVector<SDValue, 16> StChain;
- GenWidenVectorStores(StChain, ST);
+ if (GenWidenVectorStores(StChain, ST)) {
+ if (StChain.size() == 1)
+ return StChain[0];
- if (StChain.size() == 1)
- return StChain[0];
- else
return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
+ }
+
+ // Generate a vector-predicated store if it is custom/legal on the target.
+ // To avoid possible recursion, only do this if the widened mask type is
+ // legal.
+ // FIXME: Not all targets may support EVL in VP_STORE. These will have been
+ // removed from the IR by the ExpandVectorPredication pass but we're
+ // reintroducing them here.
+ SDValue StVal = ST->getValue();
+ EVT StVT = StVal.getValueType();
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StVT);
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ WideVT.getVectorElementCount());
+ if (WideVT.isScalableVector() &&
+ TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) &&
+ TLI.isTypeLegal(WideMaskVT)) {
+ // Widen the value.
+ SDLoc DL(N);
+ StVal = GetWidenedVector(StVal);
+ SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
+ MVT EVLVT = TLI.getVPExplicitVectorLengthTy();
+ unsigned NumVTElts = StVT.getVectorMinNumElements();
+ SDValue EVL =
+ DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts));
+ const auto *MMO = ST->getMemOperand();
+ return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(), Mask,
+ EVL, MMO->getPointerInfo(), MMO->getAlign(),
+ MMO->getFlags(), MMO->getAAInfo());
+ }
+
+ report_fatal_error("Unable to widen vector store");
}
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
@@ -5147,9 +5352,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
// Align: If 0, don't allow use of a wider type
// WidenEx: If Align is not 0, the amount additional we can load/store from.
-static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
- unsigned Width, EVT WidenVT,
- unsigned Align = 0, unsigned WidenEx = 0) {
+static Optional<EVT> findMemType(SelectionDAG &DAG, const TargetLowering &TLI,
+ unsigned Width, EVT WidenVT,
+ unsigned Align = 0, unsigned WidenEx = 0) {
EVT WidenEltVT = WidenVT.getVectorElementType();
const bool Scalable = WidenVT.isScalableVector();
unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinSize();
@@ -5204,9 +5409,11 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
}
}
+ // Using element-wise loads and stores for widening operations is not
+ // supported for scalable vectors
if (Scalable)
- report_fatal_error("Using element-wise loads and stores for widening "
- "operations is not supported for scalable vectors");
+ return None;
+
return RetVT;
}
@@ -5266,32 +5473,63 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
TypeSize WidthDiff = WidenWidth - LdWidth;
// Allow wider loads if they are sufficiently aligned to avoid memory faults
// and if the original load is simple.
- unsigned LdAlign = (!LD->isSimple()) ? 0 : LD->getAlignment();
+ unsigned LdAlign =
+ (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlignment();
// Find the vector type that can load from.
- EVT NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
- WidthDiff.getKnownMinSize());
- TypeSize NewVTWidth = NewVT.getSizeInBits();
- SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
+ Optional<EVT> FirstVT =
+ findMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
+ WidthDiff.getKnownMinSize());
+
+ if (!FirstVT)
+ return SDValue();
+
+ SmallVector<EVT, 8> MemVTs;
+ TypeSize FirstVTWidth = FirstVT->getSizeInBits();
+
+ // Unless we're able to load in one instruction we must work out how to load
+ // the remainder.
+ if (!TypeSize::isKnownLE(LdWidth, FirstVTWidth)) {
+ Optional<EVT> NewVT = FirstVT;
+ TypeSize RemainingWidth = LdWidth;
+ TypeSize NewVTWidth = FirstVTWidth;
+ do {
+ RemainingWidth -= NewVTWidth;
+ if (TypeSize::isKnownLT(RemainingWidth, NewVTWidth)) {
+ // The current type we are using is too large. Find a better size.
+ NewVT = findMemType(DAG, TLI, RemainingWidth.getKnownMinSize(), WidenVT,
+ LdAlign, WidthDiff.getKnownMinSize());
+ if (!NewVT)
+ return SDValue();
+ NewVTWidth = NewVT->getSizeInBits();
+ }
+ MemVTs.push_back(*NewVT);
+ } while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth));
+ }
+
+ SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction.
- if (TypeSize::isKnownLE(LdWidth, NewVTWidth)) {
- if (!NewVT.isVector()) {
- unsigned NumElts = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize();
- EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+ if (MemVTs.empty()) {
+ assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
+ if (!FirstVT->isVector()) {
+ unsigned NumElts =
+ WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize();
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
}
- if (NewVT == WidenVT)
+ if (FirstVT == WidenVT)
return LdOp;
// TODO: We don't currently have any tests that exercise this code path.
- assert(WidenWidth.getFixedSize() % NewVTWidth.getFixedSize() == 0);
- unsigned NumConcat = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize();
+ assert(WidenWidth.getFixedSize() % FirstVTWidth.getFixedSize() == 0);
+ unsigned NumConcat =
+ WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize();
SmallVector<SDValue, 16> ConcatOps(NumConcat);
- SDValue UndefVal = DAG.getUNDEF(NewVT);
+ SDValue UndefVal = DAG.getUNDEF(*FirstVT);
ConcatOps[0] = LdOp;
for (unsigned i = 1; i != NumConcat; ++i)
ConcatOps[i] = UndefVal;
@@ -5304,28 +5542,22 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
uint64_t ScaledOffset = 0;
MachinePointerInfo MPI = LD->getPointerInfo();
- do {
- LdWidth -= NewVTWidth;
- IncrementPointer(cast<LoadSDNode>(LdOp), NewVT, MPI, BasePtr,
- &ScaledOffset);
-
- if (TypeSize::isKnownLT(LdWidth, NewVTWidth)) {
- // The current type we are using is too large. Find a better size.
- NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
- WidthDiff.getKnownMinSize());
- NewVTWidth = NewVT.getSizeInBits();
- }
+ // First incremement past the first load.
+ IncrementPointer(cast<LoadSDNode>(LdOp), *FirstVT, MPI, BasePtr,
+ &ScaledOffset);
+
+ for (EVT MemVT : MemVTs) {
Align NewAlign = ScaledOffset == 0
? LD->getOriginalAlign()
: commonAlignment(LD->getAlign(), ScaledOffset);
SDValue L =
- DAG.getLoad(NewVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo);
- LdChain.push_back(L.getValue(1));
+ DAG.getLoad(MemVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo);
LdOps.push_back(L);
- LdOp = L;
- } while (TypeSize::isKnownGT(LdWidth, NewVTWidth));
+ LdChain.push_back(L.getValue(1));
+ IncrementPointer(cast<LoadSDNode>(L), MemVT, MPI, BasePtr, &ScaledOffset);
+ }
// Build the vector from the load operations.
unsigned End = LdOps.size();
@@ -5447,7 +5679,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
return DAG.getBuildVector(WidenVT, dl, Ops);
}
-void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
+bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
StoreSDNode *ST) {
// The strategy assumes that we can efficiently store power-of-two widths.
// The routine chops the vector into the largest vector stores with the same
@@ -5473,9 +5705,30 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
MachinePointerInfo MPI = ST->getPointerInfo();
uint64_t ScaledOffset = 0;
+
+ // A breakdown of how to widen this vector store. Each element of the vector
+ // is a memory VT combined with the number of times it is to be stored to,
+ // e,g., v5i32 -> {{v2i32,2},{i32,1}}
+ SmallVector<std::pair<EVT, unsigned>, 4> MemVTs;
+
while (StWidth.isNonZero()) {
// Find the largest vector type we can store with.
- EVT NewVT = FindMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT);
+ Optional<EVT> NewVT =
+ findMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT);
+ if (!NewVT)
+ return false;
+ MemVTs.push_back({*NewVT, 0});
+ TypeSize NewVTWidth = NewVT->getSizeInBits();
+
+ do {
+ StWidth -= NewVTWidth;
+ MemVTs.back().second++;
+ } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));
+ }
+
+ for (const auto &Pair : MemVTs) {
+ EVT NewVT = Pair.first;
+ unsigned Count = Pair.second;
TypeSize NewVTWidth = NewVT.getSizeInBits();
if (NewVT.isVector()) {
@@ -5490,12 +5743,10 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
MMOFlags, AAInfo);
StChain.push_back(PartStore);
- StWidth -= NewVTWidth;
Idx += NumVTElts;
-
IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr,
&ScaledOffset);
- } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));
+ } while (--Count);
} else {
// Cast the vector to the scalar type we can store.
unsigned NumElts = ValWidth.getFixedSize() / NewVTWidth.getFixedSize();
@@ -5511,13 +5762,14 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
MMOFlags, AAInfo);
StChain.push_back(PartStore);
- StWidth -= NewVTWidth;
IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr);
- } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));
+ } while (--Count);
// Restore index back to be relative to the original widen element type.
Idx = Idx * NewVTWidth.getFixedSize() / ValEltWidth;
}
}
+
+ return true;
}
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 75b4242a415c..f64b332a7fef 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -192,7 +192,7 @@ public:
// Returns the SDNodes which this SDDbgValue depends on.
SmallVector<SDNode *> getSDNodes() const {
SmallVector<SDNode *> Dependencies;
- for (SDDbgOperand DbgOp : getLocationOps())
+ for (const SDDbgOperand &DbgOp : getLocationOps())
if (DbgOp.getKind() == SDDbgOperand::SDNODE)
Dependencies.push_back(DbgOp.getSDNode());
for (SDNode *Node : getAdditionalDependencies())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 0022e5ec31f0..1b89864116cb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -56,9 +56,7 @@ namespace {
SUnit *pop() {
if (empty()) return nullptr;
- SUnit *V = Queue.back();
- Queue.pop_back();
- return V;
+ return Queue.pop_back_val();
}
};
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index b2a8c8bdd78c..95f7e43b151d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -384,13 +384,12 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
// There are either zero or one users of the Glue result.
bool HasGlueUse = false;
- for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
- UI != E; ++UI)
- if (GlueVal.isOperandOf(*UI)) {
+ for (SDNode *U : N->uses())
+ if (GlueVal.isOperandOf(U)) {
HasGlueUse = true;
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
- N = *UI;
+ N = U;
if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
NodeSUnit->isCall = true;
break;
@@ -742,7 +741,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
/// Returns true if \p DV has any VReg operand locations which don't exist in
/// VRBaseMap.
auto HasUnknownVReg = [&VRBaseMap](SDDbgValue *DV) {
- for (SDDbgOperand L : DV->getLocationOps()) {
+ for (const SDDbgOperand &L : DV->getLocationOps()) {
if (L.getKind() == SDDbgOperand::SDNODE &&
VRBaseMap.count({L.getSDNode(), L.getResNo()}) == 0)
return true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 2a98464425c4..008665d50233 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -175,7 +176,7 @@ bool ISD::isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly) {
if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) {
APInt SplatVal;
- return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnesValue();
+ return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnes();
}
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -224,7 +225,7 @@ bool ISD::isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly) {
if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) {
APInt SplatVal;
- return isConstantSplatVector(N, SplatVal) && SplatVal.isNullValue();
+ return isConstantSplatVector(N, SplatVal) && SplatVal.isZero();
}
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -412,6 +413,28 @@ bool ISD::isVPOpcode(unsigned Opcode) {
}
}
+bool ISD::isVPBinaryOp(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+#define PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \
+ case ISD::SDOPC: \
+ return true;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+}
+
+bool ISD::isVPReduction(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+#define PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \
+ case ISD::SDOPC: \
+ return true;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+}
+
/// The operand position of the vector mask.
Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) {
switch (Opcode) {
@@ -683,6 +706,34 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(ST->getPointerInfo().getAddrSpace());
break;
}
+ case ISD::VP_LOAD: {
+ const VPLoadSDNode *ELD = cast<VPLoadSDNode>(N);
+ ID.AddInteger(ELD->getMemoryVT().getRawBits());
+ ID.AddInteger(ELD->getRawSubclassData());
+ ID.AddInteger(ELD->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::VP_STORE: {
+ const VPStoreSDNode *EST = cast<VPStoreSDNode>(N);
+ ID.AddInteger(EST->getMemoryVT().getRawBits());
+ ID.AddInteger(EST->getRawSubclassData());
+ ID.AddInteger(EST->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::VP_GATHER: {
+ const VPGatherSDNode *EG = cast<VPGatherSDNode>(N);
+ ID.AddInteger(EG->getMemoryVT().getRawBits());
+ ID.AddInteger(EG->getRawSubclassData());
+ ID.AddInteger(EG->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::VP_SCATTER: {
+ const VPScatterSDNode *ES = cast<VPScatterSDNode>(N);
+ ID.AddInteger(ES->getMemoryVT().getRawBits());
+ ID.AddInteger(ES->getRawSubclassData());
+ ID.AddInteger(ES->getPointerInfo().getAddrSpace());
+ break;
+ }
case ISD::MLOAD: {
const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
ID.AddInteger(MLD->getMemoryVT().getRawBits());
@@ -1319,10 +1370,7 @@ SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
- EVT EltVT = VT.getScalarType();
- SDValue NegOne =
- getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT);
- return getNode(ISD::XOR, DL, VT, Val, NegOne);
+ return getNode(ISD::XOR, DL, VT, Val, getAllOnesConstant(DL, VT));
}
SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) {
@@ -1901,7 +1949,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
if (SameNumElts)
return N1;
if (auto *C = dyn_cast<ConstantSDNode>(Splat))
- if (C->isNullValue())
+ if (C->isZero())
return N1;
}
@@ -2265,19 +2313,8 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
const APInt &C1 = N1C->getAPIntValue();
- switch (Cond) {
- default: llvm_unreachable("Unknown integer setcc!");
- case ISD::SETEQ: return getBoolConstant(C1 == C2, dl, VT, OpVT);
- case ISD::SETNE: return getBoolConstant(C1 != C2, dl, VT, OpVT);
- case ISD::SETULT: return getBoolConstant(C1.ult(C2), dl, VT, OpVT);
- case ISD::SETUGT: return getBoolConstant(C1.ugt(C2), dl, VT, OpVT);
- case ISD::SETULE: return getBoolConstant(C1.ule(C2), dl, VT, OpVT);
- case ISD::SETUGE: return getBoolConstant(C1.uge(C2), dl, VT, OpVT);
- case ISD::SETLT: return getBoolConstant(C1.slt(C2), dl, VT, OpVT);
- case ISD::SETGT: return getBoolConstant(C1.sgt(C2), dl, VT, OpVT);
- case ISD::SETLE: return getBoolConstant(C1.sle(C2), dl, VT, OpVT);
- case ISD::SETGE: return getBoolConstant(C1.sge(C2), dl, VT, OpVT);
- }
+ return getBoolConstant(ICmpInst::compare(C1, C2, getICmpCondCode(Cond)),
+ dl, VT, OpVT);
}
}
@@ -2380,7 +2417,7 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) {
return SDValue();
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return GetDemandedBits(V, DemandedBits, DemandedElts);
}
@@ -2475,7 +2512,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
switch (V.getOpcode()) {
case ISD::SPLAT_VECTOR:
UndefElts = V.getOperand(0).isUndef()
- ? APInt::getAllOnesValue(DemandedElts.getBitWidth())
+ ? APInt::getAllOnes(DemandedElts.getBitWidth())
: APInt(DemandedElts.getBitWidth(), 0);
return true;
case ISD::ADD:
@@ -2507,7 +2544,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
unsigned NumElts = VT.getVectorNumElements();
assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch");
- UndefElts = APInt::getNullValue(NumElts);
+ UndefElts = APInt::getZero(NumElts);
switch (V.getOpcode()) {
case ISD::BUILD_VECTOR: {
@@ -2576,7 +2613,7 @@ bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) {
// For now we don't support this with scalable vectors.
if (!VT.isScalableVector())
- DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
return isSplatValue(V, DemandedElts, UndefElts) &&
(AllowUndefs || !UndefElts);
}
@@ -2592,7 +2629,7 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) {
APInt DemandedElts;
if (!VT.isScalableVector())
- DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
if (isSplatValue(V, DemandedElts, UndefElts)) {
if (VT.isScalableVector()) {
@@ -2740,7 +2777,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const {
}
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return computeKnownBits(Op, DemandedElts, Depth);
}
@@ -2878,7 +2915,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
APInt DemandedSrcElts = DemandedElts;
- DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
Known.One.setAllBits();
Known.Zero.setAllBits();
@@ -2965,11 +3002,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
// bits from the overlapping larger input elements and extracting the
// sub sections we actually care about.
unsigned SubScale = SubBitWidth / BitWidth;
- APInt SubDemandedElts(NumElts / SubScale, 0);
- for (unsigned i = 0; i != NumElts; ++i)
- if (DemandedElts[i])
- SubDemandedElts.setBit(i / SubScale);
-
+ APInt SubDemandedElts =
+ APIntOps::ScaleBitMask(DemandedElts, NumElts / SubScale);
Known2 = computeKnownBits(N0, SubDemandedElts, Depth + 1);
Known.Zero.setAllBits(); Known.One.setAllBits();
@@ -3415,7 +3449,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
// If we know the element index, just demand that vector element, else for
// an unknown element index, ignore DemandedElts and demand them all.
- APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+ APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
DemandedSrcElts =
@@ -3647,6 +3681,12 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
}))
return true;
+ // Is the operand of a splat vector a constant power of two?
+ if (Val.getOpcode() == ISD::SPLAT_VECTOR)
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val->getOperand(0)))
+ if (C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2())
+ return true;
+
// More could be done here, though the above checks are enough
// to handle some common cases.
@@ -3663,7 +3703,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
return 1;
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return ComputeNumSignBits(Op, DemandedElts, Depth);
}
@@ -3771,10 +3811,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
assert(VT.isVector() && "Expected bitcast to vector");
unsigned Scale = SrcBits / VTBits;
- APInt SrcDemandedElts(NumElts / Scale, 0);
- for (unsigned i = 0; i != NumElts; ++i)
- if (DemandedElts[i])
- SrcDemandedElts.setBit(i / Scale);
+ APInt SrcDemandedElts =
+ APIntOps::ScaleBitMask(DemandedElts, NumElts / Scale);
// Fast case - sign splat can be simply split across the small elements.
Tmp = ComputeNumSignBits(N0, SrcDemandedElts, Depth + 1);
@@ -3946,13 +3984,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// Special case decrementing a value (ADD X, -1):
if (ConstantSDNode *CRHS =
isConstOrConstSplat(Op.getOperand(1), DemandedElts))
- if (CRHS->isAllOnesValue()) {
+ if (CRHS->isAllOnes()) {
KnownBits Known =
computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
- if ((Known.Zero | 1).isAllOnesValue())
+ if ((Known.Zero | 1).isAllOnes())
return VTBits;
// If we are subtracting one from a positive number, there is no carry
@@ -3971,12 +4009,12 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// Handle NEG.
if (ConstantSDNode *CLHS =
isConstOrConstSplat(Op.getOperand(0), DemandedElts))
- if (CLHS->isNullValue()) {
+ if (CLHS->isZero()) {
KnownBits Known =
computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
- if ((Known.Zero | 1).isAllOnesValue())
+ if ((Known.Zero | 1).isAllOnes())
return VTBits;
// If the input is known to be positive (the sign bit is known clear),
@@ -4080,7 +4118,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// If we know the element index, just demand that vector element, else for
// an unknown element index, ignore DemandedElts and demand them all.
- APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+ APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
DemandedSrcElts =
@@ -4126,7 +4164,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
APInt DemandedSrcElts = DemandedElts;
- DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
Tmp = std::numeric_limits<unsigned>::max();
if (!!DemandedSubElts) {
@@ -4248,6 +4286,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return std::max(FirstAnswer, Mask.countLeadingOnes());
}
+unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op, unsigned Depth) const {
+ unsigned SignBits = ComputeNumSignBits(Op, Depth);
+ return Op.getScalarValueSizeInBits() - SignBits + 1;
+}
+
+unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op,
+ const APInt &DemandedElts,
+ unsigned Depth) const {
+ unsigned SignBits = ComputeNumSignBits(Op, DemandedElts, Depth);
+ return Op.getScalarValueSizeInBits() - SignBits + 1;
+}
+
bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly,
unsigned Depth) const {
// Early out for FREEZE.
@@ -4260,7 +4310,7 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly,
return false;
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return isGuaranteedNotToBeUndefOrPoison(Op, DemandedElts, PoisonOnly, Depth);
}
@@ -4285,7 +4335,17 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
case ISD::UNDEF:
return PoisonOnly;
- // TODO: ISD::BUILD_VECTOR handling
+ case ISD::BUILD_VECTOR:
+ // NOTE: BUILD_VECTOR has implicit truncation of wider scalar elements -
+ // this shouldn't affect the result.
+ for (unsigned i = 0, e = Op.getNumOperands(); i < e; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ if (!isGuaranteedNotToBeUndefOrPoison(Op.getOperand(i), PoisonOnly,
+ Depth + 1))
+ return false;
+ }
+ return true;
// TODO: Search for noundef attributes from library functions.
@@ -4449,8 +4509,8 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
"Floating point types unsupported - use isKnownNeverZeroFloat");
// If the value is a constant, we can obviously see if it is a zero or not.
- if (ISD::matchUnaryPredicate(
- Op, [](ConstantSDNode *C) { return !C->isNullValue(); }))
+ if (ISD::matchUnaryPredicate(Op,
+ [](ConstantSDNode *C) { return !C->isZero(); }))
return true;
// TODO: Recognize more cases here.
@@ -4490,7 +4550,7 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
static SDValue FoldSTEP_VECTOR(const SDLoc &DL, EVT VT, SDValue Step,
SelectionDAG &DAG) {
- if (cast<ConstantSDNode>(Step)->isNullValue())
+ if (cast<ConstantSDNode>(Step)->isZero())
return DAG.getConstant(0, DL, VT);
return SDValue();
@@ -4676,7 +4736,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: {
APFloat apf(EVTToAPFloatSemantics(VT),
- APInt::getNullValue(VT.getSizeInBits()));
+ APInt::getZero(VT.getSizeInBits()));
(void)apf.convertFromAPInt(Val,
Opcode==ISD::SINT_TO_FP,
APFloat::rmNearestTiesToEven);
@@ -4828,7 +4888,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTPOP: {
SDValue Ops = {Operand};
- if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
+ if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops))
return Fold;
}
}
@@ -4976,6 +5036,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
+ if (OpOpcode == ISD::VSCALE && !NewNodesMustHaveLegalTypes)
+ return getVScale(DL, VT, Operand.getConstantOperandAPInt(0));
break;
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG:
@@ -5206,173 +5268,111 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::CONCAT_VECTORS)
return SDValue();
- // For now, the array Ops should only contain two values.
- // This enforcement will be removed once this function is merged with
- // FoldConstantVectorArithmetic
- if (Ops.size() != 2)
+ unsigned NumOps = Ops.size();
+ if (NumOps == 0)
return SDValue();
if (isUndef(Opcode, Ops))
return getUNDEF(VT);
- SDNode *N1 = Ops[0].getNode();
- SDNode *N2 = Ops[1].getNode();
-
// Handle the case of two scalars.
- if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) {
- if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) {
- if (C1->isOpaque() || C2->isOpaque())
- return SDValue();
-
- Optional<APInt> FoldAttempt =
- FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue());
- if (!FoldAttempt)
- return SDValue();
-
- SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT);
- assert((!Folded || !VT.isVector()) &&
- "Can't fold vectors ops with scalar operands");
- return Folded;
- }
- }
-
- // fold (add Sym, c) -> Sym+c
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N1))
- return FoldSymbolOffset(Opcode, VT, GA, N2);
- if (TLI->isCommutativeBinOp(Opcode))
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2))
- return FoldSymbolOffset(Opcode, VT, GA, N1);
-
- // For fixed width vectors, extract each constant element and fold them
- // individually. Either input may be an undef value.
- bool IsBVOrSV1 = N1->getOpcode() == ISD::BUILD_VECTOR ||
- N1->getOpcode() == ISD::SPLAT_VECTOR;
- if (!IsBVOrSV1 && !N1->isUndef())
- return SDValue();
- bool IsBVOrSV2 = N2->getOpcode() == ISD::BUILD_VECTOR ||
- N2->getOpcode() == ISD::SPLAT_VECTOR;
- if (!IsBVOrSV2 && !N2->isUndef())
- return SDValue();
- // If both operands are undef, that's handled the same way as scalars.
- if (!IsBVOrSV1 && !IsBVOrSV2)
- return SDValue();
-
- EVT SVT = VT.getScalarType();
- EVT LegalSVT = SVT;
- if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) {
- LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
- if (LegalSVT.bitsLT(SVT))
- return SDValue();
- }
-
- SmallVector<SDValue, 4> Outputs;
- unsigned NumOps = 0;
- if (IsBVOrSV1)
- NumOps = std::max(NumOps, N1->getNumOperands());
- if (IsBVOrSV2)
- NumOps = std::max(NumOps, N2->getNumOperands());
- assert(NumOps != 0 && "Expected non-zero operands");
- // Scalable vectors should only be SPLAT_VECTOR or UNDEF here. We only need
- // one iteration for that.
- assert((!VT.isScalableVector() || NumOps == 1) &&
- "Scalable vector should only have one scalar");
-
- for (unsigned I = 0; I != NumOps; ++I) {
- // We can have a fixed length SPLAT_VECTOR and a BUILD_VECTOR so we need
- // to use operand 0 of the SPLAT_VECTOR for each fixed element.
- SDValue V1;
- if (N1->getOpcode() == ISD::BUILD_VECTOR)
- V1 = N1->getOperand(I);
- else if (N1->getOpcode() == ISD::SPLAT_VECTOR)
- V1 = N1->getOperand(0);
- else
- V1 = getUNDEF(SVT);
-
- SDValue V2;
- if (N2->getOpcode() == ISD::BUILD_VECTOR)
- V2 = N2->getOperand(I);
- else if (N2->getOpcode() == ISD::SPLAT_VECTOR)
- V2 = N2->getOperand(0);
- else
- V2 = getUNDEF(SVT);
-
- if (SVT.isInteger()) {
- if (V1.getValueType().bitsGT(SVT))
- V1 = getNode(ISD::TRUNCATE, DL, SVT, V1);
- if (V2.getValueType().bitsGT(SVT))
- V2 = getNode(ISD::TRUNCATE, DL, SVT, V2);
+ if (NumOps == 2) {
+ // TODO: Move foldConstantFPMath here?
+
+ if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) {
+ if (auto *C2 = dyn_cast<ConstantSDNode>(Ops[1])) {
+ if (C1->isOpaque() || C2->isOpaque())
+ return SDValue();
+
+ Optional<APInt> FoldAttempt =
+ FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue());
+ if (!FoldAttempt)
+ return SDValue();
+
+ SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT);
+ assert((!Folded || !VT.isVector()) &&
+ "Can't fold vectors ops with scalar operands");
+ return Folded;
+ }
}
- if (V1.getValueType() != SVT || V2.getValueType() != SVT)
- return SDValue();
-
- // Fold one vector element.
- SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2);
- if (LegalSVT != SVT)
- ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult);
-
- // Scalar folding only succeeded if the result is a constant or UNDEF.
- if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
- ScalarResult.getOpcode() != ISD::ConstantFP)
- return SDValue();
- Outputs.push_back(ScalarResult);
- }
-
- if (N1->getOpcode() == ISD::BUILD_VECTOR ||
- N2->getOpcode() == ISD::BUILD_VECTOR) {
- assert(VT.getVectorNumElements() == Outputs.size() &&
- "Vector size mismatch!");
-
- // Build a big vector out of the scalar elements we generated.
- return getBuildVector(VT, SDLoc(), Outputs);
+ // fold (add Sym, c) -> Sym+c
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[0]))
+ return FoldSymbolOffset(Opcode, VT, GA, Ops[1].getNode());
+ if (TLI->isCommutativeBinOp(Opcode))
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[1]))
+ return FoldSymbolOffset(Opcode, VT, GA, Ops[0].getNode());
}
- assert((N1->getOpcode() == ISD::SPLAT_VECTOR ||
- N2->getOpcode() == ISD::SPLAT_VECTOR) &&
- "One operand should be a splat vector");
-
- assert(Outputs.size() == 1 && "Vector size mismatch!");
- return getSplatVector(VT, SDLoc(), Outputs[0]);
-}
-
-// TODO: Merge with FoldConstantArithmetic
-SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
- const SDLoc &DL, EVT VT,
- ArrayRef<SDValue> Ops,
- const SDNodeFlags Flags) {
- // If the opcode is a target-specific ISD node, there's nothing we can
- // do here and the operand rules may not line up with the below, so
- // bail early.
- if (Opcode >= ISD::BUILTIN_OP_END)
- return SDValue();
-
- if (isUndef(Opcode, Ops))
- return getUNDEF(VT);
-
- // We can only fold vectors - maybe merge with FoldConstantArithmetic someday?
+ // This is for vector folding only from here on.
if (!VT.isVector())
return SDValue();
ElementCount NumElts = VT.getVectorElementCount();
+ // See if we can fold through bitcasted integer ops.
+ // TODO: Can we handle undef elements?
+ if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() &&
+ Ops[0].getValueType() == VT && Ops[1].getValueType() == VT &&
+ Ops[0].getOpcode() == ISD::BITCAST &&
+ Ops[1].getOpcode() == ISD::BITCAST) {
+ SDValue N1 = peekThroughBitcasts(Ops[0]);
+ SDValue N2 = peekThroughBitcasts(Ops[1]);
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+ auto *BV2 = dyn_cast<BuildVectorSDNode>(N2);
+ EVT BVVT = N1.getValueType();
+ if (BV1 && BV2 && BVVT.isInteger() && BVVT == N2.getValueType()) {
+ bool IsLE = getDataLayout().isLittleEndian();
+ unsigned EltBits = VT.getScalarSizeInBits();
+ SmallVector<APInt> RawBits1, RawBits2;
+ BitVector UndefElts1, UndefElts2;
+ if (BV1->getConstantRawBits(IsLE, EltBits, RawBits1, UndefElts1) &&
+ BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2) &&
+ UndefElts1.none() && UndefElts2.none()) {
+ SmallVector<APInt> RawBits;
+ for (unsigned I = 0, E = NumElts.getFixedValue(); I != E; ++I) {
+ Optional<APInt> Fold = FoldValue(Opcode, RawBits1[I], RawBits2[I]);
+ if (!Fold)
+ break;
+ RawBits.push_back(Fold.getValue());
+ }
+ if (RawBits.size() == NumElts.getFixedValue()) {
+ // We have constant folded, but we need to cast this again back to
+ // the original (possibly legalized) type.
+ SmallVector<APInt> DstBits;
+ BitVector DstUndefs;
+ BuildVectorSDNode::recastRawBits(IsLE, BVVT.getScalarSizeInBits(),
+ DstBits, RawBits, DstUndefs,
+ BitVector(RawBits.size(), false));
+ EVT BVEltVT = BV1->getOperand(0).getValueType();
+ unsigned BVEltBits = BVEltVT.getSizeInBits();
+ SmallVector<SDValue> Ops(DstBits.size(), getUNDEF(BVEltVT));
+ for (unsigned I = 0, E = DstBits.size(); I != E; ++I) {
+ if (DstUndefs[I])
+ continue;
+ Ops[I] = getConstant(DstBits[I].sextOrSelf(BVEltBits), DL, BVEltVT);
+ }
+ return getBitcast(VT, getBuildVector(BVVT, DL, Ops));
+ }
+ }
+ }
+ }
+
auto IsScalarOrSameVectorSize = [NumElts](const SDValue &Op) {
return !Op.getValueType().isVector() ||
Op.getValueType().getVectorElementCount() == NumElts;
};
- auto IsConstantBuildVectorSplatVectorOrUndef = [](const SDValue &Op) {
- APInt SplatVal;
- BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op);
+ auto IsBuildVectorSplatVectorOrUndef = [](const SDValue &Op) {
return Op.isUndef() || Op.getOpcode() == ISD::CONDCODE ||
- (BV && BV->isConstant()) ||
- (Op.getOpcode() == ISD::SPLAT_VECTOR &&
- ISD::isConstantSplatVector(Op.getNode(), SplatVal));
+ Op.getOpcode() == ISD::BUILD_VECTOR ||
+ Op.getOpcode() == ISD::SPLAT_VECTOR;
};
// All operands must be vector types with the same number of elements as
- // the result type and must be either UNDEF or a build vector of constant
+ // the result type and must be either UNDEF or a build/splat vector
// or UNDEF scalars.
- if (!llvm::all_of(Ops, IsConstantBuildVectorSplatVectorOrUndef) ||
+ if (!llvm::all_of(Ops, IsBuildVectorSplatVectorOrUndef) ||
!llvm::all_of(Ops, IsScalarOrSameVectorSize))
return SDValue();
@@ -5392,17 +5392,16 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
// For scalable vector types we know we're dealing with SPLAT_VECTORs. We
// only have one operand to check. For fixed-length vector types we may have
// a combination of BUILD_VECTOR and SPLAT_VECTOR.
- unsigned NumOperands = NumElts.isScalable() ? 1 : NumElts.getFixedValue();
+ unsigned NumVectorElts = NumElts.isScalable() ? 1 : NumElts.getFixedValue();
// Constant fold each scalar lane separately.
SmallVector<SDValue, 4> ScalarResults;
- for (unsigned I = 0; I != NumOperands; I++) {
+ for (unsigned I = 0; I != NumVectorElts; I++) {
SmallVector<SDValue, 4> ScalarOps;
for (SDValue Op : Ops) {
EVT InSVT = Op.getValueType().getScalarType();
if (Op.getOpcode() != ISD::BUILD_VECTOR &&
Op.getOpcode() != ISD::SPLAT_VECTOR) {
- // We've checked that this is UNDEF or a constant of some kind.
if (Op.isUndef())
ScalarOps.push_back(getUNDEF(InSVT));
else
@@ -5423,7 +5422,7 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
}
// Constant fold the scalar operands.
- SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags);
+ SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps);
// Legalize the (integer) scalar constant if necessary.
if (LegalSVT != SVT)
@@ -5591,9 +5590,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
N1.getValueType() == VT && "Binary operator types must match!");
// (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's
// worth handling here.
- if (N2C && N2C->isNullValue())
+ if (N2C && N2C->isZero())
return N2;
- if (N2C && N2C->isAllOnesValue()) // X & -1 -> X
+ if (N2C && N2C->isAllOnes()) // X & -1 -> X
return N1;
break;
case ISD::OR:
@@ -5605,7 +5604,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
N1.getValueType() == VT && "Binary operator types must match!");
// (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so
// it's worth handling here.
- if (N2C && N2C->isNullValue())
+ if (N2C && N2C->isZero())
return N1;
if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.isVector() &&
VT.getVectorElementType() == MVT::i1)
@@ -5711,7 +5710,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// size of the value, the shift/rotate count is guaranteed to be zero.
if (VT == MVT::i1)
return N1;
- if (N2C && N2C->isNullValue())
+ if (N2C && N2C->isZero())
return N1;
break;
case ISD::FP_ROUND:
@@ -6086,7 +6085,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return V;
// Vector constant folding.
SDValue Ops[] = {N1, N2, N3};
- if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) {
+ if (SDValue V = FoldConstantArithmetic(Opcode, DL, VT, Ops)) {
NewSDValueDbgMsg(V, "New node vector constant folding: ", this);
return V;
}
@@ -6099,6 +6098,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
case ISD::VECTOR_SHUFFLE:
llvm_unreachable("should use getVectorShuffle constructor!");
+ case ISD::VECTOR_SPLICE: {
+ if (cast<ConstantSDNode>(N3)->isNullValue())
+ return N1;
+ break;
+ }
case ISD::INSERT_VECTOR_ELT: {
ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3);
// INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except
@@ -6214,9 +6218,8 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
ArgChains.push_back(Chain);
// Add a chain value for each stack argument.
- for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(),
- UE = getEntryNode().getNode()->use_end(); U != UE; ++U)
- if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
+ for (SDNode *U : getEntryNode().getNode()->uses())
+ if (LoadSDNode *L = dyn_cast<LoadSDNode>(U))
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
if (FI->getIndex() < 0)
ArgChains.push_back(SDValue(L, 1));
@@ -6720,7 +6723,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
bool IsZeroVal =
- isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
+ isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isZero();
if (!TLI.findOptimalMemOpLowering(
MemOps, TLI.getMaxStoresPerMemset(OptSize),
MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol),
@@ -6809,7 +6812,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (ConstantSize) {
// Memcpy with size zero? Just return the original chain.
- if (ConstantSize->isNullValue())
+ if (ConstantSize->isZero())
return Chain;
SDValue Result = getMemcpyLoadsAndStores(
@@ -6924,7 +6927,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (ConstantSize) {
// Memmove with size zero? Just return the original chain.
- if (ConstantSize->isNullValue())
+ if (ConstantSize->isZero())
return Chain;
SDValue Result = getMemmoveLoadsAndStores(
@@ -7026,7 +7029,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (ConstantSize) {
// Memset with size zero? Just return the original chain.
- if (ConstantSize->isNullValue())
+ if (ConstantSize->isZero())
return Chain;
SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src,
@@ -7618,6 +7621,374 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
return V;
}
+SDValue SelectionDAG::getLoadVP(
+ ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl,
+ SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment,
+ MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
+ const MDNode *Ranges, bool IsExpanding) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOLoad;
+ assert((MMOFlags & MachineMemOperand::MOStore) == 0);
+ // If we don't have a PtrInfo, infer the trivial frame index case to simplify
+ // clients.
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);
+
+ uint64_t Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize());
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size,
+ Alignment, AAInfo, Ranges);
+ return getLoadVP(AM, ExtType, VT, dl, Chain, Ptr, Offset, Mask, EVL, MemVT,
+ MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM,
+ ISD::LoadExtType ExtType, EVT VT,
+ const SDLoc &dl, SDValue Chain, SDValue Ptr,
+ SDValue Offset, SDValue Mask, SDValue EVL,
+ EVT MemVT, MachineMemOperand *MMO,
+ bool IsExpanding) {
+ if (VT == MemVT) {
+ ExtType = ISD::NON_EXTLOAD;
+ } else if (ExtType == ISD::NON_EXTLOAD) {
+ assert(VT == MemVT && "Non-extending load from different memory type!");
+ } else {
+ // Extending load.
+ assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be an extending load, not truncating!");
+ assert(VT.isInteger() == MemVT.isInteger() &&
+ "Cannot convert from FP to Int or Int -> FP!");
+ assert(VT.isVector() == MemVT.isVector() &&
+ "Cannot use an ext load to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorElementCount() == MemVT.getVectorElementCount()) &&
+ "Cannot use an ext load to change the number of vector elements!");
+ }
+
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!");
+
+ SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other)
+ : getVTList(VT, MVT::Other);
+ SDValue Ops[] = {Chain, Ptr, Offset, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_LOAD, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPLoadSDNode>(
+ dl.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPLoadSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<VPLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
+ ExtType, IsExpanding, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo,
+ MaybeAlign Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, const MDNode *Ranges,
+ bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+ Mask, EVL, PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges,
+ IsExpanding);
+}
+
+SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachineMemOperand *MMO, bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+ Mask, EVL, VT, MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl,
+ EVT VT, SDValue Chain, SDValue Ptr,
+ SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo, EVT MemVT,
+ MaybeAlign Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoadVP(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, Mask,
+ EVL, PtrInfo, MemVT, Alignment, MMOFlags, AAInfo, nullptr,
+ IsExpanding);
+}
+
+SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl,
+ EVT VT, SDValue Chain, SDValue Ptr,
+ SDValue Mask, SDValue EVL, EVT MemVT,
+ MachineMemOperand *MMO, bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoadVP(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, Mask,
+ EVL, MemVT, MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getIndexedLoadVP(SDValue OrigLoad, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ auto *LD = cast<VPLoadSDNode>(OrigLoad);
+ assert(LD->getOffset().isUndef() && "Load is already a indexed load!");
+ // Don't propagate the invariant or dereferenceable flags.
+ auto MMOFlags =
+ LD->getMemOperand()->getFlags() &
+ ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
+ return getLoadVP(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
+ LD->getChain(), Base, Offset, LD->getMask(),
+ LD->getVectorLength(), LD->getPointerInfo(),
+ LD->getMemoryVT(), LD->getAlign(), MMOFlags, LD->getAAInfo(),
+ nullptr, LD->isExpandingLoad());
+}
+
+SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo, Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, bool IsCompressing) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ uint64_t Size =
+ MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize());
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
+ return getStoreVP(Chain, dl, Val, Ptr, Mask, EVL, MMO, IsCompressing);
+}
+
+SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachineMemOperand *MMO, bool IsCompressing) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ EVT VT = Val.getValueType();
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>(
+ dl.getIROrder(), VTs, ISD::UNINDEXED, false, IsCompressing, VT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N =
+ newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ ISD::UNINDEXED, false, IsCompressing, VT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl,
+ SDValue Val, SDValue Ptr, SDValue Mask,
+ SDValue EVL, MachinePointerInfo PtrInfo,
+ EVT SVT, Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo,
+ bool IsCompressing) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()),
+ Alignment, AAInfo);
+ return getTruncStoreVP(Chain, dl, Val, Ptr, Mask, EVL, SVT, MMO,
+ IsCompressing);
+}
+
+SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl,
+ SDValue Val, SDValue Ptr, SDValue Mask,
+ SDValue EVL, EVT SVT,
+ MachineMemOperand *MMO,
+ bool IsCompressing) {
+ EVT VT = Val.getValueType();
+
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ if (VT == SVT)
+ return getStoreVP(Chain, dl, Val, Ptr, Mask, EVL, MMO, IsCompressing);
+
+ assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be a truncating store, not extending!");
+ assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!");
+ assert(VT.isVector() == SVT.isVector() &&
+ "Cannot use trunc store to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorElementCount() == SVT.getVectorElementCount()) &&
+ "Cannot use trunc store to change the number of vector elements!");
+
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops);
+ ID.AddInteger(SVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>(
+ dl.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N =
+ newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ ISD::UNINDEXED, true, IsCompressing, SVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ auto *ST = cast<VPStoreSDNode>(OrigStore);
+ assert(ST->getOffset().isUndef() && "Store is already an indexed store!");
+ SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+ SDValue Ops[] = {ST->getChain(), ST->getValue(), Base,
+ Offset, ST->getMask(), ST->getVectorLength()};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<VPStoreSDNode>(
+ dl.getIROrder(), dl.getDebugLoc(), VTs, AM, ST->isTruncatingStore(),
+ ST->isCompressingStore(), ST->getMemoryVT(), ST->getMemOperand());
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl,
+ ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType) {
+ assert(Ops.size() == 6 && "Incompatible number of operands");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_GATHER, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPGatherSDNode>(
+ dl.getIROrder(), VTs, VT, MMO, IndexType));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPGatherSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ auto *N = newSDNode<VPGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ VT, MMO, IndexType);
+ createOperands(N, Ops);
+
+ assert(N->getMask().getValueType().getVectorElementCount() ==
+ N->getValueType(0).getVectorElementCount() &&
+ "Vector width mismatch between mask and data");
+ assert(N->getIndex().getValueType().getVectorElementCount().isScalable() ==
+ N->getValueType(0).getVectorElementCount().isScalable() &&
+ "Scalable flags of index and data do not match");
+ assert(ElementCount::isKnownGE(
+ N->getIndex().getValueType().getVectorElementCount(),
+ N->getValueType(0).getVectorElementCount()) &&
+ "Vector width mismatch between index and data");
+ assert(isa<ConstantSDNode>(N->getScale()) &&
+ cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ "Scale should be a constant power of 2");
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl,
+ ArrayRef<SDValue> Ops,
+ MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType) {
+ assert(Ops.size() == 7 && "Incompatible number of operands");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_SCATTER, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPScatterSDNode>(
+ dl.getIROrder(), VTs, VT, MMO, IndexType));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPScatterSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<VPScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ VT, MMO, IndexType);
+ createOperands(N, Ops);
+
+ assert(N->getMask().getValueType().getVectorElementCount() ==
+ N->getValue().getValueType().getVectorElementCount() &&
+ "Vector width mismatch between mask and data");
+ assert(
+ N->getIndex().getValueType().getVectorElementCount().isScalable() ==
+ N->getValue().getValueType().getVectorElementCount().isScalable() &&
+ "Scalable flags of index and data do not match");
+ assert(ElementCount::isKnownGE(
+ N->getIndex().getValueType().getVectorElementCount(),
+ N->getValue().getValueType().getVectorElementCount()) &&
+ "Vector width mismatch between index and data");
+ assert(isa<ConstantSDNode>(N->getScale()) &&
+ cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ "Scale should be a constant power of 2");
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Base, SDValue Offset, SDValue Mask,
SDValue PassThru, EVT MemVT,
@@ -7818,7 +8189,7 @@ SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) {
// select true, T, F --> T
// select false, T, F --> F
if (auto *CondC = dyn_cast<ConstantSDNode>(Cond))
- return CondC->isNullValue() ? F : T;
+ return CondC->isZero() ? F : T;
// TODO: This should simplify VSELECT with constant condition using something
// like this (but check boolean contents to be complete?):
@@ -9296,7 +9667,7 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) {
}
#ifndef NDEBUG
-void SelectionDAG::VerifyDAGDiverence() {
+void SelectionDAG::VerifyDAGDivergence() {
std::vector<SDNode *> TopoOrder;
CreateTopologicalOrder(TopoOrder);
for (auto *N : TopoOrder) {
@@ -9384,21 +9755,20 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
// before SortedPos will contain the topological sort index, and the
// Node Id fields for nodes At SortedPos and after will contain the
// count of outstanding operands.
- for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
- SDNode *N = &*I++;
- checkForCycles(N, this);
- unsigned Degree = N->getNumOperands();
+ for (SDNode &N : llvm::make_early_inc_range(allnodes())) {
+ checkForCycles(&N, this);
+ unsigned Degree = N.getNumOperands();
if (Degree == 0) {
// A node with no uses, add it to the result array immediately.
- N->setNodeId(DAGSize++);
- allnodes_iterator Q(N);
+ N.setNodeId(DAGSize++);
+ allnodes_iterator Q(&N);
if (Q != SortedPos)
SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));
assert(SortedPos != AllNodes.end() && "Overran node list");
++SortedPos;
} else {
// Temporarily use the Node Id as scratch space for the degree count.
- N->setNodeId(Degree);
+ N.setNodeId(Degree);
}
}
@@ -9512,12 +9882,9 @@ SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,
std::string ErrorStr;
raw_string_ostream ErrorFormatter(ErrorStr);
-
ErrorFormatter << "Undefined external symbol ";
ErrorFormatter << '"' << Symbol << '"';
- ErrorFormatter.flush();
-
- report_fatal_error(ErrorStr);
+ report_fatal_error(Twine(ErrorFormatter.str()));
}
//===----------------------------------------------------------------------===//
@@ -9526,7 +9893,7 @@ SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,
bool llvm::isNullConstant(SDValue V) {
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isNullValue();
+ return Const != nullptr && Const->isZero();
}
bool llvm::isNullFPConstant(SDValue V) {
@@ -9536,7 +9903,7 @@ bool llvm::isNullFPConstant(SDValue V) {
bool llvm::isAllOnesConstant(SDValue V) {
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isAllOnesValue();
+ return Const != nullptr && Const->isAllOnes();
}
bool llvm::isOneConstant(SDValue V) {
@@ -9670,7 +10037,7 @@ bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) {
// TODO: may want to use peekThroughBitcast() here.
ConstantSDNode *C =
isConstOrConstSplat(N, AllowUndefs, /*AllowTruncation=*/true);
- return C && C->isNullValue();
+ return C && C->isZero();
}
bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) {
@@ -9684,7 +10051,7 @@ bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) {
N = peekThroughBitcasts(N);
unsigned BitWidth = N.getScalarValueSizeInBits();
ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs);
- return C && C->isAllOnesValue() && C->getValueSizeInBits(0) == BitWidth;
+ return C && C->isAllOnes() && C->getValueSizeInBits(0) == BitWidth;
}
HandleSDNode::~HandleSDNode() {
@@ -9790,8 +10157,7 @@ bool SDNode::hasAnyUseOfValue(unsigned Value) const {
/// isOnlyUserOf - Return true if this node is the only use of N.
bool SDNode::isOnlyUserOf(const SDNode *N) const {
bool Seen = false;
- for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
- SDNode *User = *I;
+ for (const SDNode *User : N->uses()) {
if (User == this)
Seen = true;
else
@@ -9804,8 +10170,7 @@ bool SDNode::isOnlyUserOf(const SDNode *N) const {
/// Return true if the only users of N are contained in Nodes.
bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {
bool Seen = false;
- for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
- SDNode *User = *I;
+ for (const SDNode *User : N->uses()) {
if (llvm::is_contained(Nodes, User))
Seen = true;
else
@@ -10212,14 +10577,14 @@ SelectionDAG::GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT,
"Mixing fixed width and scalable vectors when enveloping a type");
EVT LoVT, HiVT;
if (VTNumElts.getKnownMinValue() > EnvNumElts.getKnownMinValue()) {
- LoVT = EnvVT;
+ LoVT = EVT::getVectorVT(*getContext(), EltTp, EnvNumElts);
HiVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts - EnvNumElts);
*HiIsEmpty = false;
} else {
// Flag that hi type has zero storage size, but return split envelop type
// (this would be easier if vector types with zero elements were allowed).
LoVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts);
- HiVT = EnvVT;
+ HiVT = EVT::getVectorVT(*getContext(), EltTp, EnvNumElts);
*HiIsEmpty = true;
}
return std::make_pair(LoVT, HiVT);
@@ -10387,7 +10752,7 @@ SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts,
}
SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
- APInt DemandedElts = APInt::getAllOnesValue(getNumOperands());
+ APInt DemandedElts = APInt::getAllOnes(getNumOperands());
return getSplatValue(DemandedElts, UndefElements);
}
@@ -10439,7 +10804,7 @@ bool BuildVectorSDNode::getRepeatedSequence(const APInt &DemandedElts,
bool BuildVectorSDNode::getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence,
BitVector *UndefElements) const {
- APInt DemandedElts = APInt::getAllOnesValue(getNumOperands());
+ APInt DemandedElts = APInt::getAllOnes(getNumOperands());
return getRepeatedSequence(DemandedElts, Sequence, UndefElements);
}
@@ -10485,6 +10850,97 @@ BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
return -1;
}
+bool BuildVectorSDNode::getConstantRawBits(
+ bool IsLittleEndian, unsigned DstEltSizeInBits,
+ SmallVectorImpl<APInt> &RawBitElements, BitVector &UndefElements) const {
+ // Early-out if this contains anything but Undef/Constant/ConstantFP.
+ if (!isConstant())
+ return false;
+
+ unsigned NumSrcOps = getNumOperands();
+ unsigned SrcEltSizeInBits = getValueType(0).getScalarSizeInBits();
+ assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 &&
+ "Invalid bitcast scale");
+
+ // Extract raw src bits.
+ SmallVector<APInt> SrcBitElements(NumSrcOps,
+ APInt::getNullValue(SrcEltSizeInBits));
+ BitVector SrcUndeElements(NumSrcOps, false);
+
+ for (unsigned I = 0; I != NumSrcOps; ++I) {
+ SDValue Op = getOperand(I);
+ if (Op.isUndef()) {
+ SrcUndeElements.set(I);
+ continue;
+ }
+ auto *CInt = dyn_cast<ConstantSDNode>(Op);
+ auto *CFP = dyn_cast<ConstantFPSDNode>(Op);
+ assert((CInt || CFP) && "Unknown constant");
+ SrcBitElements[I] =
+ CInt ? CInt->getAPIntValue().truncOrSelf(SrcEltSizeInBits)
+ : CFP->getValueAPF().bitcastToAPInt();
+ }
+
+ // Recast to dst width.
+ recastRawBits(IsLittleEndian, DstEltSizeInBits, RawBitElements,
+ SrcBitElements, UndefElements, SrcUndeElements);
+ return true;
+}
+
+void BuildVectorSDNode::recastRawBits(bool IsLittleEndian,
+ unsigned DstEltSizeInBits,
+ SmallVectorImpl<APInt> &DstBitElements,
+ ArrayRef<APInt> SrcBitElements,
+ BitVector &DstUndefElements,
+ const BitVector &SrcUndefElements) {
+ unsigned NumSrcOps = SrcBitElements.size();
+ unsigned SrcEltSizeInBits = SrcBitElements[0].getBitWidth();
+ assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 &&
+ "Invalid bitcast scale");
+ assert(NumSrcOps == SrcUndefElements.size() &&
+ "Vector size mismatch");
+
+ unsigned NumDstOps = (NumSrcOps * SrcEltSizeInBits) / DstEltSizeInBits;
+ DstUndefElements.clear();
+ DstUndefElements.resize(NumDstOps, false);
+ DstBitElements.assign(NumDstOps, APInt::getNullValue(DstEltSizeInBits));
+
+ // Concatenate src elements constant bits together into dst element.
+ if (SrcEltSizeInBits <= DstEltSizeInBits) {
+ unsigned Scale = DstEltSizeInBits / SrcEltSizeInBits;
+ for (unsigned I = 0; I != NumDstOps; ++I) {
+ DstUndefElements.set(I);
+ APInt &DstBits = DstBitElements[I];
+ for (unsigned J = 0; J != Scale; ++J) {
+ unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1));
+ if (SrcUndefElements[Idx])
+ continue;
+ DstUndefElements.reset(I);
+ const APInt &SrcBits = SrcBitElements[Idx];
+ assert(SrcBits.getBitWidth() == SrcEltSizeInBits &&
+ "Illegal constant bitwidths");
+ DstBits.insertBits(SrcBits, J * SrcEltSizeInBits);
+ }
+ }
+ return;
+ }
+
+ // Split src element constant bits into dst elements.
+ unsigned Scale = SrcEltSizeInBits / DstEltSizeInBits;
+ for (unsigned I = 0; I != NumSrcOps; ++I) {
+ if (SrcUndefElements[I]) {
+ DstUndefElements.set(I * Scale, (I + 1) * Scale);
+ continue;
+ }
+ const APInt &SrcBits = SrcBitElements[I];
+ for (unsigned J = 0; J != Scale; ++J) {
+ unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1));
+ APInt &DstBits = DstBitElements[Idx];
+ DstBits = SrcBits.extractBits(DstEltSizeInBits, J * DstEltSizeInBits);
+ }
+ }
+}
+
bool BuildVectorSDNode::isConstant() const {
for (const SDValue &Op : op_values()) {
unsigned Opc = Op.getOpcode();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 20c7d771bfb6..6d8252046501 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/GlobalAlias.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include <cstdint>
@@ -143,13 +144,27 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
- // If of mismatched base types or checkable indices we can check
- // they do not alias.
- if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
- (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
- (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) {
- IsAlias = false;
- return true;
+ if ((IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) {
+ // We can derive NoAlias In case of mismatched base types.
+ if (IsFI0 != IsFI1 || IsGV0 != IsGV1 || IsCV0 != IsCV1) {
+ IsAlias = false;
+ return true;
+ }
+ if (IsGV0 && IsGV1) {
+ auto *GV0 = cast<GlobalAddressSDNode>(BasePtr0.getBase())->getGlobal();
+ auto *GV1 = cast<GlobalAddressSDNode>(BasePtr1.getBase())->getGlobal();
+ // It doesn't make sense to access one global value using another globals
+ // values address, so we can assume that there is no aliasing in case of
+ // two different globals (unless we have symbols that may indirectly point
+ // to each other).
+ // FIXME: This is perhaps a bit too defensive. We could try to follow the
+ // chain with aliasee information for GlobalAlias variables to find out if
+ // we indirect symbols may alias or not.
+ if (GV0 != GV1 && !isa<GlobalAlias>(GV0) && !isa<GlobalAlias>(GV1)) {
+ IsAlias = false;
+ return true;
+ }
+ }
}
return false; // Cannot determine whether the pointers alias.
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index bd2ebfd0bd3b..5d911c165293 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -69,6 +69,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InlineAsm.h"
@@ -399,29 +400,31 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
return Val;
if (PartEVT.isVector()) {
+ // Vector/Vector bitcast.
+ if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
// If the element type of the source/dest vectors are the same, but the
// parts vector has more elements than the value vector, then we have a
// vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
// elements we want.
- if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
+ if (PartEVT.getVectorElementCount() != ValueVT.getVectorElementCount()) {
assert((PartEVT.getVectorElementCount().getKnownMinValue() >
ValueVT.getVectorElementCount().getKnownMinValue()) &&
(PartEVT.getVectorElementCount().isScalable() ==
ValueVT.getVectorElementCount().isScalable()) &&
"Cannot narrow, it would be a lossy transformation");
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
- DAG.getVectorIdxConstant(0, DL));
+ PartEVT =
+ EVT::getVectorVT(*DAG.getContext(), PartEVT.getVectorElementType(),
+ ValueVT.getVectorElementCount());
+ Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, PartEVT, Val,
+ DAG.getVectorIdxConstant(0, DL));
+ if (PartEVT == ValueVT)
+ return Val;
}
- // Vector/Vector bitcast.
- if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
- return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
-
- assert(PartEVT.getVectorElementCount() == ValueVT.getVectorElementCount() &&
- "Cannot handle this kind of promotion");
// Promoted vector extract
return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
-
}
// Trivial bitcast if the types are the same size and the destination
@@ -670,6 +673,17 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// Promoted vector extract
Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
+ } else if (PartEVT.isVector() &&
+ PartEVT.getVectorElementType() !=
+ ValueVT.getVectorElementType() &&
+ TLI.getTypeAction(*DAG.getContext(), ValueVT) ==
+ TargetLowering::TypeWidenVector) {
+ // Combination of widening and promotion.
+ EVT WidenVT =
+ EVT::getVectorVT(*DAG.getContext(), ValueVT.getVectorElementType(),
+ PartVT.getVectorElementCount());
+ SDValue Widened = widenVectorToPartType(DAG, Val, DL, WidenVT);
+ Val = DAG.getAnyExtOrTrunc(Widened, DL, PartVT);
} else {
if (ValueVT.getVectorElementCount().isScalar()) {
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
@@ -726,15 +740,19 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
} else if (ValueVT.getSizeInBits() == BuiltVectorTy.getSizeInBits()) {
// Bitconvert vector->vector case.
Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
- } else if (SDValue Widened =
- widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) {
- Val = Widened;
- } else if (BuiltVectorTy.getVectorElementType().bitsGE(
- ValueVT.getVectorElementType()) &&
- BuiltVectorTy.getVectorElementCount() ==
- ValueVT.getVectorElementCount()) {
- // Promoted vector extract
- Val = DAG.getAnyExtOrTrunc(Val, DL, BuiltVectorTy);
+ } else {
+ if (BuiltVectorTy.getVectorElementType().bitsGT(
+ ValueVT.getVectorElementType())) {
+ // Integer promotion.
+ ValueVT = EVT::getVectorVT(*DAG.getContext(),
+ BuiltVectorTy.getVectorElementType(),
+ ValueVT.getVectorElementCount());
+ Val = DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
+ }
+
+ if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) {
+ Val = Widened;
+ }
}
assert(Val.getValueType() == BuiltVectorTy && "Unexpected vector value type");
@@ -1275,21 +1293,23 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
while (isa<Instruction>(V)) {
Instruction &VAsInst = *cast<Instruction>(V);
// Temporary "0", awaiting real implementation.
+ SmallVector<uint64_t, 16> Ops;
SmallVector<Value *, 4> AdditionalValues;
- DIExpression *SalvagedExpr =
- salvageDebugInfoImpl(VAsInst, Expr, StackValue, 0, AdditionalValues);
-
+ V = salvageDebugInfoImpl(VAsInst, Expr->getNumLocationOperands(), Ops,
+ AdditionalValues);
// If we cannot salvage any further, and haven't yet found a suitable debug
// expression, bail out.
+ if (!V)
+ break;
+
// TODO: If AdditionalValues isn't empty, then the salvage can only be
// represented with a DBG_VALUE_LIST, so we give up. When we have support
// here for variadic dbg_values, remove that condition.
- if (!SalvagedExpr || !AdditionalValues.empty())
+ if (!AdditionalValues.empty())
break;
// New value and expr now represent this debuginfo.
- V = VAsInst.getOperand(0);
- Expr = SalvagedExpr;
+ Expr = DIExpression::appendOpsToArg(Expr, Ops, 0, StackValue);
// Some kind of simplification occurred: check whether the operand of the
// salvaged debug expression can be encoded in this DAG.
@@ -1400,7 +1420,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
BitsToDescribe = *VarSize;
if (auto Fragment = Expr->getFragmentInfo())
BitsToDescribe = Fragment->SizeInBits;
- for (auto RegAndSize : RFV.getRegsAndSizes()) {
+ for (const auto &RegAndSize : RFV.getRegsAndSizes()) {
// Bail out if all bits are described already.
if (Offset >= BitsToDescribe)
break;
@@ -1945,16 +1965,13 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
/*IsVarArg*/ false, DL);
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
- if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
- Attribute::SExt))
+ if (F->getAttributes().hasRetAttr(Attribute::SExt))
ExtendKind = ISD::SIGN_EXTEND;
- else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
- Attribute::ZExt))
+ else if (F->getAttributes().hasRetAttr(Attribute::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
LLVMContext &Context = F->getContext();
- bool RetInReg = F->getAttributes().hasAttribute(
- AttributeList::ReturnIndex, Attribute::InReg);
+ bool RetInReg = F->getAttributes().hasRetAttr(Attribute::InReg);
for (unsigned j = 0; j != NumValues; ++j) {
EVT VT = ValueVTs[j];
@@ -1995,7 +2012,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
Flags.setZExt();
for (unsigned i = 0; i < NumParts; ++i) {
- Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
+ Outs.push_back(ISD::OutputArg(Flags,
+ Parts[i].getValueType().getSimpleVT(),
VT, /*isfixed=*/true, 0, 0));
OutVals.push_back(Parts[i]);
}
@@ -2012,10 +2030,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
assert(SwiftError.getFunctionArg() && "Need a swift error argument");
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
Flags.setSwiftError();
- Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
- EVT(TLI.getPointerTy(DL)) /*argvt*/,
- true /*isfixed*/, 1 /*origidx*/,
- 0 /*partOffs*/));
+ Outs.push_back(ISD::OutputArg(
+ Flags, /*vt=*/TLI.getPointerTy(DL), /*argvt=*/EVT(TLI.getPointerTy(DL)),
+ /*isfixed=*/true, /*origidx=*/1, /*partOffs=*/0));
// Create SDNode for the swifterror virtual register.
OutVals.push_back(
DAG.getRegister(SwiftError.getOrCreateVRegUseAt(
@@ -2566,7 +2583,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,
JumpTableReg, SwitchOp);
JT.Reg = JumpTableReg;
- if (!JTH.OmitRangeCheck) {
+ if (!JTH.FallthroughUnreachable) {
// Emit the range check for the jump table, and branch to the default block
// for the switch statement if the value being switched on exceeds the
// largest case in the switch.
@@ -2663,7 +2680,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
TargetLowering::ArgListEntry Entry;
Entry.Node = GuardVal;
Entry.Ty = FnTy->getParamType(0);
- if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg))
+ if (GuardCheckFn->hasParamAttribute(0, Attribute::AttrKind::InReg))
Entry.IsInReg = true;
Args.push_back(Entry);
@@ -2778,13 +2795,13 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
MachineBasicBlock* MBB = B.Cases[0].ThisBB;
- if (!B.OmitRangeCheck)
+ if (!B.FallthroughUnreachable)
addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
addSuccessorWithProb(SwitchBB, MBB, B.Prob);
SwitchBB->normalizeSuccProbs();
SDValue Root = CopyTo;
- if (!B.OmitRangeCheck) {
+ if (!B.FallthroughUnreachable) {
// Conditional branch to the default block.
SDValue RangeCmp = DAG.getSetCC(dl,
TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
@@ -3140,7 +3157,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
// count type has enough bits to represent any shift value, truncate
// it now. This is a common case and it exposes the truncate to
// optimization early.
- else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits()))
+ else if (ShiftSize >= Log2_32_Ceil(Op1.getValueSizeInBits()))
Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
// Otherwise we'll need to temporarily settle for some other convenient
// type. Type legalization will make adjustments once the shiftee is split.
@@ -4057,8 +4074,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Type *Ty = I.getType();
Align Alignment = I.getAlign();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
+ AAMDNodes AAInfo = I.getAAMetadata();
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
SmallVector<EVT, 4> ValueVTs, MemVTs;
@@ -4185,13 +4201,11 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
const Value *SV = I.getOperand(0);
Type *Ty = I.getType();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
assert(
(!AA ||
!AA->pointsToConstantMemory(MemoryLocation(
SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
- AAInfo))) &&
+ I.getAAMetadata()))) &&
"load_from_swift_error should not be constant memory");
SmallVector<EVT, 4> ValueVTs;
@@ -4249,8 +4263,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
SDLoc dl = getCurSDLoc();
Align Alignment = I.getAlign();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
+ AAMDNodes AAInfo = I.getAAMetadata();
auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());
@@ -4321,14 +4334,11 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
if (!Alignment)
Alignment = DAG.getEVTAlign(VT);
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
-
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
// TODO: Make MachineMemOperands aware of scalable
// vectors.
- VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo);
+ VT.getStoreSize().getKnownMinSize(), *Alignment, I.getAAMetadata());
SDValue StoreNode =
DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
ISD::UNINDEXED, false /* Truncating */, IsCompressing);
@@ -4358,7 +4368,7 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const DataLayout &DL = DAG.getDataLayout();
- assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
+ assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type");
// Handle splat constant pointer.
if (auto *C = dyn_cast<Constant>(Ptr)) {
@@ -4412,9 +4422,6 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
.getValueOr(DAG.getEVTAlign(VT.getScalarType()));
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
-
SDValue Base;
SDValue Index;
ISD::MemIndexType IndexType;
@@ -4427,7 +4434,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
MachinePointerInfo(AS), MachineMemOperand::MOStore,
// TODO: Make MachineMemOperands aware of scalable
// vectors.
- MemoryLocation::UnknownSize, Alignment, AAInfo);
+ MemoryLocation::UnknownSize, Alignment, I.getAAMetadata());
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
@@ -4485,8 +4492,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
if (!Alignment)
Alignment = DAG.getEVTAlign(VT);
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
+ AAMDNodes AAInfo = I.getAAMetadata();
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
// Do not serialize masked loads of constant memory with anything.
@@ -4529,8 +4535,6 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
->getMaybeAlignValue()
.getValueOr(DAG.getEVTAlign(VT.getScalarType()));
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
SDValue Root = DAG.getRoot();
@@ -4545,7 +4549,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
MachinePointerInfo(AS), MachineMemOperand::MOLoad,
// TODO: Make MachineMemOperands aware of scalable
// vectors.
- MemoryLocation::UnknownSize, Alignment, AAInfo, Ranges);
+ MemoryLocation::UnknownSize, Alignment, I.getAAMetadata(), Ranges);
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
@@ -4786,7 +4790,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
TLI.getPointerTy(DAG.getDataLayout())));
// Add all operands of the call to the operand list.
- for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ for (unsigned i = 0, e = I.arg_size(); i != e; ++i) {
const Value *Arg = I.getArgOperand(i);
if (!I.paramHasAttr(i, Attribute::ImmArg)) {
Ops.push_back(getValue(Arg));
@@ -4823,12 +4827,11 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
SDValue Result;
if (IsTgtIntrinsic) {
// This is target intrinsic that touches memory
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
Result =
DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
MachinePointerInfo(Info.ptrVal, Info.offset),
- Info.align, Info.flags, Info.size, AAInfo);
+ Info.align, Info.flags, Info.size,
+ I.getAAMetadata());
} else if (!HasChain) {
Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
} else if (!I.getType()->isVoidTy()) {
@@ -5510,12 +5513,12 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
// we've been asked to pursue.
auto MakeVRegDbgValue = [&](Register Reg, DIExpression *FragExpr,
bool Indirect) {
- if (Reg.isVirtual() && TM.Options.ValueTrackingVariableLocations) {
+ if (Reg.isVirtual() && MF.useDebugInstrRef()) {
// For VRegs, in instruction referencing mode, create a DBG_INSTR_REF
// pointing at the VReg, which will be patched up later.
auto &Inst = TII->get(TargetOpcode::DBG_INSTR_REF);
auto MIB = BuildMI(MF, DL, Inst);
- MIB.addReg(Reg, RegState::Debug);
+ MIB.addReg(Reg);
MIB.addImm(0);
MIB.addMetadata(Variable);
auto *NewDIExpr = FragExpr;
@@ -5637,7 +5640,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>>
SplitRegs) {
unsigned Offset = 0;
- for (auto RegAndSize : SplitRegs) {
+ for (const auto &RegAndSize : SplitRegs) {
// If the expression is already a fragment, the current register
// offset+size might extend beyond the fragment. In this case, only
// the register bits that are inside the fragment are relevant.
@@ -5866,12 +5869,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
/* AlwaysInline */ false, isTC,
MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)), AAInfo);
+ MachinePointerInfo(I.getArgOperand(1)),
+ I.getAAMetadata());
updateDAGForMaybeTailCall(MC);
return;
}
@@ -5889,12 +5891,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol,
/* AlwaysInline */ true, isTC,
MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)), AAInfo);
+ MachinePointerInfo(I.getArgOperand(1)),
+ I.getAAMetadata());
updateDAGForMaybeTailCall(MC);
return;
}
@@ -5908,10 +5909,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool isVol = MSI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC,
- MachinePointerInfo(I.getArgOperand(0)), AAInfo);
+ MachinePointerInfo(I.getArgOperand(0)),
+ I.getAAMetadata());
updateDAGForMaybeTailCall(MS);
return;
}
@@ -5929,11 +5929,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// FIXME: Support passing different dest/src alignments to the memmove DAG
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
isTC, MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)), AAInfo);
+ MachinePointerInfo(I.getArgOperand(1)),
+ I.getAAMetadata());
updateDAGForMaybeTailCall(MM);
return;
}
@@ -6124,7 +6123,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
if (Values.empty())
return;
- if (std::count(Values.begin(), Values.end(), nullptr))
+ if (llvm::is_contained(Values, nullptr))
return;
bool IsVariadic = DI.hasArgList();
@@ -6706,9 +6705,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::debugtrap:
case Intrinsic::trap: {
StringRef TrapFuncName =
- I.getAttributes()
- .getAttribute(AttributeList::FunctionIndex, "trap-func-name")
- .getValueAsString();
+ I.getAttributes().getFnAttr("trap-func-name").getValueAsString();
if (TrapFuncName.empty()) {
switch (Intrinsic) {
case Intrinsic::trap:
@@ -6888,7 +6885,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
// is the same on all targets.
- for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) {
+ for (unsigned Idx = 0, E = I.arg_size(); Idx < E; ++Idx) {
Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();
if (isa<ConstantPointerNull>(Arg))
continue; // Skip null pointers. They represent a hole in index space.
@@ -7058,7 +7055,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
};
SmallVector<BranchFunnelTarget, 8> Targets;
- for (unsigned Op = 1, N = I.getNumArgOperands(); Op != N; Op += 2) {
+ for (unsigned Op = 1, N = I.arg_size(); Op != N; Op += 2) {
auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
I.getArgOperand(Op), Offset, DAG.getDataLayout()));
if (ElemBase != Base)
@@ -7327,9 +7324,128 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
llvm_unreachable(
"Inconsistency: no SDNode available for this VPIntrinsic!");
+ if (*ResOPC == ISD::VP_REDUCE_SEQ_FADD ||
+ *ResOPC == ISD::VP_REDUCE_SEQ_FMUL) {
+ if (VPIntrin.getFastMathFlags().allowReassoc())
+ return *ResOPC == ISD::VP_REDUCE_SEQ_FADD ? ISD::VP_REDUCE_FADD
+ : ISD::VP_REDUCE_FMUL;
+ }
+
return ResOPC.getValue();
}
+void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
+ SmallVector<SDValue, 7> &OpValues,
+ bool isGather) {
+ SDLoc DL = getCurSDLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Value *PtrOperand = VPIntrin.getArgOperand(0);
+ MaybeAlign Alignment = DAG.getEVTAlign(VT);
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
+ SDValue LD;
+ bool AddToChain = true;
+ if (!isGather) {
+ // Do not serialize variable-length loads of constant memory with
+ // anything.
+ MemoryLocation ML;
+ if (VT.isScalableVector())
+ ML = MemoryLocation::getAfter(PtrOperand);
+ else
+ ML = MemoryLocation(
+ PtrOperand,
+ LocationSize::precise(
+ DAG.getDataLayout().getTypeStoreSize(VPIntrin.getType())),
+ AAInfo);
+ AddToChain = !AA || !AA->pointsToConstantMemory(ML);
+ SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
+ VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges);
+ LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
+ MMO, false /*IsExpanding */);
+ } else {
+ unsigned AS =
+ PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(AS), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
+ SDValue Base, Index, Scale;
+ ISD::MemIndexType IndexType;
+ bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
+ this, VPIntrin.getParent());
+ if (!UniformBase) {
+ Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ Index = getValue(PtrOperand);
+ IndexType = ISD::SIGNED_UNSCALED;
+ Scale =
+ DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ }
+ EVT IdxVT = Index.getValueType();
+ EVT EltTy = IdxVT.getVectorElementType();
+ if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
+ EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
+ Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
+ }
+ LD = DAG.getGatherVP(
+ DAG.getVTList(VT, MVT::Other), VT, DL,
+ {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO,
+ IndexType);
+ }
+ if (AddToChain)
+ PendingLoads.push_back(LD.getValue(1));
+ setValue(&VPIntrin, LD);
+}
+
+void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin,
+ SmallVector<SDValue, 7> &OpValues,
+ bool isScatter) {
+ SDLoc DL = getCurSDLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Value *PtrOperand = VPIntrin.getArgOperand(1);
+ EVT VT = OpValues[0].getValueType();
+ MaybeAlign Alignment = DAG.getEVTAlign(VT);
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ SDValue ST;
+ if (!isScatter) {
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
+ VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo);
+ ST =
+ DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], OpValues[1],
+ OpValues[2], OpValues[3], MMO, false /* IsTruncating */);
+ } else {
+ unsigned AS =
+ PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(AS), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo);
+ SDValue Base, Index, Scale;
+ ISD::MemIndexType IndexType;
+ bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
+ this, VPIntrin.getParent());
+ if (!UniformBase) {
+ Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ Index = getValue(PtrOperand);
+ IndexType = ISD::SIGNED_UNSCALED;
+ Scale =
+ DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ }
+ EVT IdxVT = Index.getValueType();
+ EVT EltTy = IdxVT.getVectorElementType();
+ if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
+ EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
+ Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
+ }
+ ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL,
+ {getMemoryRoot(), OpValues[0], Base, Index, Scale,
+ OpValues[2], OpValues[3]},
+ MMO, IndexType);
+ }
+ DAG.setRoot(ST);
+ setValue(&VPIntrin, ST);
+}
+
void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
const VPIntrinsic &VPIntrin) {
SDLoc DL = getCurSDLoc();
@@ -7349,15 +7465,29 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
// Request operands.
SmallVector<SDValue, 7> OpValues;
- for (unsigned I = 0; I < VPIntrin.getNumArgOperands(); ++I) {
+ for (unsigned I = 0; I < VPIntrin.arg_size(); ++I) {
auto Op = getValue(VPIntrin.getArgOperand(I));
if (I == EVLParamPos)
Op = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, Op);
OpValues.push_back(Op);
}
- SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);
- setValue(&VPIntrin, Result);
+ switch (Opcode) {
+ default: {
+ SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);
+ setValue(&VPIntrin, Result);
+ break;
+ }
+ case ISD::VP_LOAD:
+ case ISD::VP_GATHER:
+ visitVPLoadGather(VPIntrin, ValueVTs[0], OpValues,
+ Opcode == ISD::VP_GATHER);
+ break;
+ case ISD::VP_STORE:
+ case ISD::VP_SCATTER:
+ visitVPStoreScatter(VPIntrin, OpValues, Opcode == ISD::VP_SCATTER);
+ break;
+ }
}
SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain,
@@ -7760,12 +7890,11 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
// because the return pointer needs to be adjusted by the size of
// the copied memory.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false,
/*isTailCall=*/false,
MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)), AAInfo);
+ MachinePointerInfo(I.getArgOperand(1)),
+ I.getAAMetadata());
assert(MC.getNode() != nullptr &&
"** memcpy should not be lowered as TailCall in mempcpy context **");
DAG.setRoot(MC);
@@ -7918,6 +8047,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
}
if (Function *F = I.getCalledFunction()) {
+ diagnoseDontCall(I);
+
if (F->isDeclaration()) {
// Is this an LLVM intrinsic or a target-specific intrinsic?
unsigned IID = F->getIntrinsicID();
@@ -8261,9 +8392,10 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
///
/// OpInfo describes the operand
/// RefOpInfo describes the matching operand if any, the operand otherwise
-static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
- SDISelAsmOperandInfo &OpInfo,
- SDISelAsmOperandInfo &RefOpInfo) {
+static llvm::Optional<unsigned>
+getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
+ SDISelAsmOperandInfo &OpInfo,
+ SDISelAsmOperandInfo &RefOpInfo) {
LLVMContext &Context = *DAG.getContext();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -8273,7 +8405,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
// No work to do for memory operations.
if (OpInfo.ConstraintType == TargetLowering::C_Memory)
- return;
+ return None;
// If this is a constraint for a single physreg, or a constraint for a
// register class, find it.
@@ -8283,7 +8415,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
&TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT);
// RC is unset only on failure. Return immediately.
if (!RC)
- return;
+ return None;
// Get the actual register value type. This is important, because the user
// may have asked for (e.g.) the AX register in i32 type. We need to
@@ -8328,7 +8460,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
// No need to allocate a matching input constraint since the constraint it's
// matching to has already been allocated.
if (OpInfo.isMatchingInputConstraint())
- return;
+ return None;
EVT ValueVT = OpInfo.ConstraintVT;
if (OpInfo.ConstraintVT == MVT::Other)
@@ -8351,8 +8483,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
// Do not check for single registers.
if (AssignedReg) {
- for (; *I != AssignedReg; ++I)
- assert(I != RC->end() && "AssignedReg should be member of RC");
+ I = std::find(I, RC->end(), AssignedReg);
+ if (I == RC->end()) {
+ // RC does not contain the selected register, which indicates a
+ // mismatch between the register and the required type/bitwidth.
+ return {AssignedReg};
+ }
}
for (; NumRegs; --NumRegs, ++I) {
@@ -8362,6 +8498,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
}
OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+ return None;
}
static unsigned
@@ -8452,12 +8589,12 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
// Process the call argument. BasicBlocks are labels, currently appearing
// only in asm's.
if (isa<CallBrInst>(Call) &&
- ArgNo - 1 >= (cast<CallBrInst>(&Call)->getNumArgOperands() -
+ ArgNo - 1 >= (cast<CallBrInst>(&Call)->arg_size() -
cast<CallBrInst>(&Call)->getNumIndirectDests() -
NumMatchingOps) &&
(NumMatchingOps == 0 ||
- ArgNo - 1 < (cast<CallBrInst>(&Call)->getNumArgOperands() -
- NumMatchingOps))) {
+ ArgNo - 1 <
+ (cast<CallBrInst>(&Call)->arg_size() - NumMatchingOps))) {
const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal);
EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true);
OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT);
@@ -8595,7 +8732,18 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
OpInfo.isMatchingInputConstraint()
? ConstraintOperands[OpInfo.getMatchedOperand()]
: OpInfo;
- GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
+ const auto RegError =
+ getRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
+ if (RegError.hasValue()) {
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const char *RegName = TRI.getName(RegError.getValue());
+ emitInlineAsmError(Call, "register '" + Twine(RegName) +
+ "' allocated for constraint '" +
+ Twine(OpInfo.ConstraintCode) +
+ "' does not match required type");
+ return;
+ }
auto DetectWriteToReservedRegister = [&]() {
const MachineFunction &MF = DAG.getMachineFunction();
@@ -8674,7 +8822,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
- RegisterSDNode *R = dyn_cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]);
+ auto *R = cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]);
Register TiedReg = R->getReg();
MVT RegVT = R->getSimpleValueType(0);
const TargetRegisterClass *RC =
@@ -9319,7 +9467,7 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2;
- if (I.getNumArgOperands() > 1)
+ if (I.arg_size() > 1)
Op2 = getValue(I.getArgOperand(1));
SDLoc dl = getCurSDLoc();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
@@ -9673,9 +9821,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// if it isn't first piece, alignment must be 1
// For scalable vectors the scalable part is currently handled
// by individual targets, so we just use the known minimum size here.
- ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT,
- i < CLI.NumFixedArgs, i,
- j*Parts[j].getValueType().getStoreSize().getKnownMinSize());
+ ISD::OutputArg MyFlags(
+ Flags, Parts[j].getValueType().getSimpleVT(), VT,
+ i < CLI.NumFixedArgs, i,
+ j * Parts[j].getValueType().getStoreSize().getKnownMinSize());
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
else if (j != 0) {
@@ -9843,10 +9992,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
None); // This is not an ABI copy.
SDValue Chain = DAG.getEntryNode();
- ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
- FuncInfo.PreferredExtendType.end())
- ? ISD::ANY_EXTEND
- : FuncInfo.PreferredExtendType[V];
+ ISD::NodeType ExtendType = ISD::ANY_EXTEND;
+ auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(V);
+ if (PreferredExtendIt != FuncInfo.PreferredExtendType.end())
+ ExtendType = PreferredExtendIt->second;
RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType);
PendingExports.push_back(Chain);
}
@@ -10492,27 +10641,6 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
ConstantsOut.clear();
}
-/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB
-/// is 0.
-MachineBasicBlock *
-SelectionDAGBuilder::StackProtectorDescriptor::
-AddSuccessorMBB(const BasicBlock *BB,
- MachineBasicBlock *ParentMBB,
- bool IsLikely,
- MachineBasicBlock *SuccMBB) {
- // If SuccBB has not been created yet, create it.
- if (!SuccMBB) {
- MachineFunction *MF = ParentMBB->getParent();
- MachineFunction::iterator BBI(ParentMBB);
- SuccMBB = MF->CreateMachineBasicBlock(BB);
- MF->insert(++BBI, SuccMBB);
- }
- // Add it as a successor of ParentMBB.
- ParentMBB->addSuccessor(
- SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely));
- return SuccMBB;
-}
-
MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
MachineFunction::iterator I(MBB);
if (++I == FuncInfo.MF->end())
@@ -10677,12 +10805,10 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
}
}
- if (FallthroughUnreachable) {
- // Skip the range check if the fallthrough block is unreachable.
- JTH->OmitRangeCheck = true;
- }
+ if (FallthroughUnreachable)
+ JTH->FallthroughUnreachable = true;
- if (!JTH->OmitRangeCheck)
+ if (!JTH->FallthroughUnreachable)
addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
CurMBB->normalizeSuccProbs();
@@ -10720,10 +10846,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
BTB->DefaultProb -= DefaultProb / 2;
}
- if (FallthroughUnreachable) {
- // Skip the range check if the fallthrough block is unreachable.
- BTB->OmitRangeCheck = true;
- }
+ if (FallthroughUnreachable)
+ BTB->FallthroughUnreachable = true;
// If we're in the right place, emit the bit test header right now.
if (CurMBB == SwitchMBB) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index df5be156821f..d6122aa0a739 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
@@ -180,204 +181,6 @@ private:
SwitchCG::CaseClusterVector &Clusters,
BranchProbability &PeeledCaseProb);
- /// A class which encapsulates all of the information needed to generate a
- /// stack protector check and signals to isel via its state being initialized
- /// that a stack protector needs to be generated.
- ///
- /// *NOTE* The following is a high level documentation of SelectionDAG Stack
- /// Protector Generation. The reason that it is placed here is for a lack of
- /// other good places to stick it.
- ///
- /// High Level Overview of SelectionDAG Stack Protector Generation:
- ///
- /// Previously, generation of stack protectors was done exclusively in the
- /// pre-SelectionDAG Codegen LLVM IR Pass "Stack Protector". This necessitated
- /// splitting basic blocks at the IR level to create the success/failure basic
- /// blocks in the tail of the basic block in question. As a result of this,
- /// calls that would have qualified for the sibling call optimization were no
- /// longer eligible for optimization since said calls were no longer right in
- /// the "tail position" (i.e. the immediate predecessor of a ReturnInst
- /// instruction).
- ///
- /// Then it was noticed that since the sibling call optimization causes the
- /// callee to reuse the caller's stack, if we could delay the generation of
- /// the stack protector check until later in CodeGen after the sibling call
- /// decision was made, we get both the tail call optimization and the stack
- /// protector check!
- ///
- /// A few goals in solving this problem were:
- ///
- /// 1. Preserve the architecture independence of stack protector generation.
- ///
- /// 2. Preserve the normal IR level stack protector check for platforms like
- /// OpenBSD for which we support platform-specific stack protector
- /// generation.
- ///
- /// The main problem that guided the present solution is that one can not
- /// solve this problem in an architecture independent manner at the IR level
- /// only. This is because:
- ///
- /// 1. The decision on whether or not to perform a sibling call on certain
- /// platforms (for instance i386) requires lower level information
- /// related to available registers that can not be known at the IR level.
- ///
- /// 2. Even if the previous point were not true, the decision on whether to
- /// perform a tail call is done in LowerCallTo in SelectionDAG which
- /// occurs after the Stack Protector Pass. As a result, one would need to
- /// put the relevant callinst into the stack protector check success
- /// basic block (where the return inst is placed) and then move it back
- /// later at SelectionDAG/MI time before the stack protector check if the
- /// tail call optimization failed. The MI level option was nixed
- /// immediately since it would require platform-specific pattern
- /// matching. The SelectionDAG level option was nixed because
- /// SelectionDAG only processes one IR level basic block at a time
- /// implying one could not create a DAG Combine to move the callinst.
- ///
- /// To get around this problem a few things were realized:
- ///
- /// 1. While one can not handle multiple IR level basic blocks at the
- /// SelectionDAG Level, one can generate multiple machine basic blocks
- /// for one IR level basic block. This is how we handle bit tests and
- /// switches.
- ///
- /// 2. At the MI level, tail calls are represented via a special return
- /// MIInst called "tcreturn". Thus if we know the basic block in which we
- /// wish to insert the stack protector check, we get the correct behavior
- /// by always inserting the stack protector check right before the return
- /// statement. This is a "magical transformation" since no matter where
- /// the stack protector check intrinsic is, we always insert the stack
- /// protector check code at the end of the BB.
- ///
- /// Given the aforementioned constraints, the following solution was devised:
- ///
- /// 1. On platforms that do not support SelectionDAG stack protector check
- /// generation, allow for the normal IR level stack protector check
- /// generation to continue.
- ///
- /// 2. On platforms that do support SelectionDAG stack protector check
- /// generation:
- ///
- /// a. Use the IR level stack protector pass to decide if a stack
- /// protector is required/which BB we insert the stack protector check
- /// in by reusing the logic already therein. If we wish to generate a
- /// stack protector check in a basic block, we place a special IR
- /// intrinsic called llvm.stackprotectorcheck right before the BB's
- /// returninst or if there is a callinst that could potentially be
- /// sibling call optimized, before the call inst.
- ///
- /// b. Then when a BB with said intrinsic is processed, we codegen the BB
- /// normally via SelectBasicBlock. In said process, when we visit the
- /// stack protector check, we do not actually emit anything into the
- /// BB. Instead, we just initialize the stack protector descriptor
- /// class (which involves stashing information/creating the success
- /// mbbb and the failure mbb if we have not created one for this
- /// function yet) and export the guard variable that we are going to
- /// compare.
- ///
- /// c. After we finish selecting the basic block, in FinishBasicBlock if
- /// the StackProtectorDescriptor attached to the SelectionDAGBuilder is
- /// initialized, we produce the validation code with one of these
- /// techniques:
- /// 1) with a call to a guard check function
- /// 2) with inlined instrumentation
- ///
- /// 1) We insert a call to the check function before the terminator.
- ///
- /// 2) We first find a splice point in the parent basic block
- /// before the terminator and then splice the terminator of said basic
- /// block into the success basic block. Then we code-gen a new tail for
- /// the parent basic block consisting of the two loads, the comparison,
- /// and finally two branches to the success/failure basic blocks. We
- /// conclude by code-gening the failure basic block if we have not
- /// code-gened it already (all stack protector checks we generate in
- /// the same function, use the same failure basic block).
- class StackProtectorDescriptor {
- public:
- StackProtectorDescriptor() = default;
-
- /// Returns true if all fields of the stack protector descriptor are
- /// initialized implying that we should/are ready to emit a stack protector.
- bool shouldEmitStackProtector() const {
- return ParentMBB && SuccessMBB && FailureMBB;
- }
-
- bool shouldEmitFunctionBasedCheckStackProtector() const {
- return ParentMBB && !SuccessMBB && !FailureMBB;
- }
-
- /// Initialize the stack protector descriptor structure for a new basic
- /// block.
- void initialize(const BasicBlock *BB, MachineBasicBlock *MBB,
- bool FunctionBasedInstrumentation) {
- // Make sure we are not initialized yet.
- assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is "
- "already initialized!");
- ParentMBB = MBB;
- if (!FunctionBasedInstrumentation) {
- SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true);
- FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB);
- }
- }
-
- /// Reset state that changes when we handle different basic blocks.
- ///
- /// This currently includes:
- ///
- /// 1. The specific basic block we are generating a
- /// stack protector for (ParentMBB).
- ///
- /// 2. The successor machine basic block that will contain the tail of
- /// parent mbb after we create the stack protector check (SuccessMBB). This
- /// BB is visited only on stack protector check success.
- void resetPerBBState() {
- ParentMBB = nullptr;
- SuccessMBB = nullptr;
- }
-
- /// Reset state that only changes when we switch functions.
- ///
- /// This currently includes:
- ///
- /// 1. FailureMBB since we reuse the failure code path for all stack
- /// protector checks created in an individual function.
- ///
- /// 2.The guard variable since the guard variable we are checking against is
- /// always the same.
- void resetPerFunctionState() {
- FailureMBB = nullptr;
- }
-
- MachineBasicBlock *getParentMBB() { return ParentMBB; }
- MachineBasicBlock *getSuccessMBB() { return SuccessMBB; }
- MachineBasicBlock *getFailureMBB() { return FailureMBB; }
-
- private:
- /// The basic block for which we are generating the stack protector.
- ///
- /// As a result of stack protector generation, we will splice the
- /// terminators of this basic block into the successor mbb SuccessMBB and
- /// replace it with a compare/branch to the successor mbbs
- /// SuccessMBB/FailureMBB depending on whether or not the stack protector
- /// was violated.
- MachineBasicBlock *ParentMBB = nullptr;
-
- /// A basic block visited on stack protector check success that contains the
- /// terminators of ParentMBB.
- MachineBasicBlock *SuccessMBB = nullptr;
-
- /// This basic block visited on stack protector check failure that will
- /// contain a call to __stack_chk_fail().
- MachineBasicBlock *FailureMBB = nullptr;
-
- /// Add a successor machine basic block to ParentMBB. If the successor mbb
- /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic
- /// block will be created. Assign a large weight if IsLikely is true.
- MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB,
- MachineBasicBlock *ParentMBB,
- bool IsLikely,
- MachineBasicBlock *SuccMBB = nullptr);
- };
-
private:
const TargetMachine &TM;
@@ -764,6 +567,10 @@ private:
void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
+ void visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
+ SmallVector<SDValue, 7> &OpValues, bool isGather);
+ void visitVPStoreScatter(const VPIntrinsic &VPIntrin,
+ SmallVector<SDValue, 7> &OpValues, bool isScatter);
void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin);
void visitVAStart(const CallInst &I);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 40083c614a6c..77e9e53668f9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -146,9 +146,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
if (IID < Intrinsic::num_intrinsics)
return Intrinsic::getBaseName((Intrinsic::ID)IID).str();
- else if (!G)
+ if (!G)
return "Unknown intrinsic";
- else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
+ if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
return TII->getName(IID);
llvm_unreachable("Invalid intrinsic ID");
}
@@ -526,13 +526,13 @@ static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO,
if (G) {
const MachineFunction *MF = &G->getMachineFunction();
return printMemOperand(OS, MMO, MF, MF->getFunction().getParent(),
- &MF->getFrameInfo(), G->getSubtarget().getInstrInfo(),
- *G->getContext());
- } else {
- LLVMContext Ctx;
- return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr,
- /*MFI=*/nullptr, /*TII=*/nullptr, Ctx);
+ &MF->getFrameInfo(),
+ G->getSubtarget().getInstrInfo(), *G->getContext());
}
+
+ LLVMContext Ctx;
+ return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr,
+ /*MFI=*/nullptr, /*TII=*/nullptr, Ctx);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -948,17 +948,19 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G,
if (!Value.getNode()) {
OS << "<null>";
return false;
- } else if (shouldPrintInline(*Value.getNode(), G)) {
+ }
+
+ if (shouldPrintInline(*Value.getNode(), G)) {
OS << Value->getOperationName(G) << ':';
Value->print_types(OS, G);
Value->print_details(OS, G);
return true;
- } else {
- OS << PrintNodeId(*Value.getNode());
- if (unsigned RN = Value.getResNo())
- OS << ':' << RN;
- return false;
}
+
+ OS << PrintNodeId(*Value.getNode());
+ if (unsigned RN = Value.getResNo())
+ OS << ':' << RN;
+ return false;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1012,15 +1014,12 @@ static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
N->print(OS, G);
- if (depth < 1)
- return;
-
for (const SDValue &Op : N->op_values()) {
// Don't follow chain operands.
if (Op.getValueType() == MVT::Other)
continue;
OS << '\n';
- printrWithDepthHelper(OS, Op.getNode(), G, depth-1, indent+2);
+ printrWithDepthHelper(OS, Op.getNode(), G, depth - 1, indent + 2);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 09627ee6a164..c7e37cf8ca14 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -33,6 +33,7 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
@@ -575,7 +576,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
LiveInMap.insert(LI);
// Insert DBG_VALUE instructions for function arguments to the entry block.
- bool InstrRef = TM.Options.ValueTrackingVariableLocations;
+ bool InstrRef = MF->useDebugInstrRef();
for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
MachineInstr *MI = FuncInfo->ArgDbgValues[e - i - 1];
assert(MI->getOpcode() != TargetOpcode::DBG_VALUE_LIST &&
@@ -699,7 +700,7 @@ static void reportFastISelFailure(MachineFunction &MF,
R << (" (in function: " + MF.getName() + ")").str();
if (ShouldAbort)
- report_fatal_error(R.getMsg());
+ report_fatal_error(Twine(R.getMsg()));
ORE.emit(R);
}
@@ -798,7 +799,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
if (ViewDAGCombine1 && MatchFilterBB)
@@ -818,7 +819,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
// Second step, hack on the DAG until it only uses operations and types that
@@ -840,7 +841,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
// Only allow creation of legal node types.
@@ -864,7 +865,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
}
@@ -882,7 +883,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
{
@@ -898,7 +899,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
if (ViewDAGCombineLT && MatchFilterBB)
@@ -918,7 +919,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
}
@@ -938,7 +939,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
if (ViewDAGCombine2 && MatchFilterBB)
@@ -958,7 +959,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
if (OptLevel != CodeGenOpt::None)
@@ -1045,25 +1046,25 @@ public:
} // end anonymous namespace
// This function is used to enforce the topological node id property
-// property leveraged during Instruction selection. Before selection all
-// nodes are given a non-negative id such that all nodes have a larger id than
+// leveraged during instruction selection. Before the selection process all
+// nodes are given a non-negative id such that all nodes have a greater id than
// their operands. As this holds transitively we can prune checks that a node N
// is a predecessor of M another by not recursively checking through M's
-// operands if N's ID is larger than M's ID. This is significantly improves
-// performance of for various legality checks (e.g. IsLegalToFold /
-// UpdateChains).
-
-// However, when we fuse multiple nodes into a single node
-// during selection we may induce a predecessor relationship between inputs and
-// outputs of distinct nodes being merged violating the topological property.
-// Should a fused node have a successor which has yet to be selected, our
-// legality checks would be incorrect. To avoid this we mark all unselected
-// sucessor nodes, i.e. id != -1 as invalid for pruning by bit-negating (x =>
+// operands if N's ID is larger than M's ID. This significantly improves
+// performance of various legality checks (e.g. IsLegalToFold / UpdateChains).
+
+// However, when we fuse multiple nodes into a single node during the
+// selection we may induce a predecessor relationship between inputs and
+// outputs of distinct nodes being merged, violating the topological property.
+// Should a fused node have a successor which has yet to be selected,
+// our legality checks would be incorrect. To avoid this we mark all unselected
+// successor nodes, i.e. id != -1, as invalid for pruning by bit-negating (x =>
// (-(x+1))) the ids and modify our pruning check to ignore negative Ids of M.
// We use bit-negation to more clearly enforce that node id -1 can only be
-// achieved by selected nodes). As the conversion is reversable the original Id,
-// topological pruning can still be leveraged when looking for unselected nodes.
-// This method is call internally in all ISel replacement calls.
+// achieved by selected nodes. As the conversion is reversable to the original
+// Id, topological pruning can still be leveraged when looking for unselected
+// nodes. This method is called internally in all ISel replacement related
+// functions.
void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) {
SmallVector<SDNode *, 4> Nodes;
Nodes.push_back(Node);
@@ -1080,7 +1081,7 @@ void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) {
}
}
-// InvalidateNodeId - As discusses in EnforceNodeIdInvariant, mark a
+// InvalidateNodeId - As explained in EnforceNodeIdInvariant, mark a
// NodeId with the equivalent node id which is invalid for topological
// pruning.
void SelectionDAGISel::InvalidateNodeId(SDNode *N) {
@@ -1226,7 +1227,10 @@ static void mapWasmLandingPadIndex(MachineBasicBlock *MBB,
bool IsSingleCatchAllClause =
CPI->getNumArgOperands() == 1 &&
cast<Constant>(CPI->getArgOperand(0))->isNullValue();
- if (!IsSingleCatchAllClause) {
+ // cathchpads for longjmp use an empty type list, e.g. catchpad within %0 []
+ // and they don't need LSDA info
+ bool IsCatchLongjmp = CPI->getNumArgOperands() == 0;
+ if (!IsSingleCatchAllClause && !IsCatchLongjmp) {
// Create a mapping from landing pad label to landing pad index.
bool IntrFound = false;
for (const User *U : CPI->users()) {
@@ -1644,114 +1648,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
SDB->SPDescriptor.resetPerFunctionState();
}
-/// Given that the input MI is before a partial terminator sequence TSeq, return
-/// true if M + TSeq also a partial terminator sequence.
-///
-/// A Terminator sequence is a sequence of MachineInstrs which at this point in
-/// lowering copy vregs into physical registers, which are then passed into
-/// terminator instructors so we can satisfy ABI constraints. A partial
-/// terminator sequence is an improper subset of a terminator sequence (i.e. it
-/// may be the whole terminator sequence).
-static bool MIIsInTerminatorSequence(const MachineInstr &MI) {
- // If we do not have a copy or an implicit def, we return true if and only if
- // MI is a debug value.
- if (!MI.isCopy() && !MI.isImplicitDef())
- // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the
- // physical registers if there is debug info associated with the terminator
- // of our mbb. We want to include said debug info in our terminator
- // sequence, so we return true in that case.
- return MI.isDebugInstr();
-
- // We have left the terminator sequence if we are not doing one of the
- // following:
- //
- // 1. Copying a vreg into a physical register.
- // 2. Copying a vreg into a vreg.
- // 3. Defining a register via an implicit def.
-
- // OPI should always be a register definition...
- MachineInstr::const_mop_iterator OPI = MI.operands_begin();
- if (!OPI->isReg() || !OPI->isDef())
- return false;
-
- // Defining any register via an implicit def is always ok.
- if (MI.isImplicitDef())
- return true;
-
- // Grab the copy source...
- MachineInstr::const_mop_iterator OPI2 = OPI;
- ++OPI2;
- assert(OPI2 != MI.operands_end()
- && "Should have a copy implying we should have 2 arguments.");
-
- // Make sure that the copy dest is not a vreg when the copy source is a
- // physical register.
- if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) &&
- Register::isPhysicalRegister(OPI2->getReg())))
- return false;
-
- return true;
-}
-
-/// Find the split point at which to splice the end of BB into its success stack
-/// protector check machine basic block.
-///
-/// On many platforms, due to ABI constraints, terminators, even before register
-/// allocation, use physical registers. This creates an issue for us since
-/// physical registers at this point can not travel across basic
-/// blocks. Luckily, selectiondag always moves physical registers into vregs
-/// when they enter functions and moves them through a sequence of copies back
-/// into the physical registers right before the terminator creating a
-/// ``Terminator Sequence''. This function is searching for the beginning of the
-/// terminator sequence so that we can ensure that we splice off not just the
-/// terminator, but additionally the copies that move the vregs into the
-/// physical registers.
-static MachineBasicBlock::iterator
-FindSplitPointForStackProtector(MachineBasicBlock *BB,
- const TargetInstrInfo &TII) {
- MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator();
- if (SplitPoint == BB->begin())
- return SplitPoint;
-
- MachineBasicBlock::iterator Start = BB->begin();
- MachineBasicBlock::iterator Previous = SplitPoint;
- --Previous;
-
- if (TII.isTailCall(*SplitPoint) &&
- Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) {
- // call itself, then we must insert before the sequence even starts. For
- // example:
- // <split point>
- // ADJCALLSTACKDOWN ...
- // <Moves>
- // ADJCALLSTACKUP ...
- // TAILJMP somewhere
- // On the other hand, it could be an unrelated call in which case this tail call
- // has to register moves of its own and should be the split point. For example:
- // ADJCALLSTACKDOWN
- // CALL something_else
- // ADJCALLSTACKUP
- // <split point>
- // TAILJMP somewhere
- do {
- --Previous;
- if (Previous->isCall())
- return SplitPoint;
- } while(Previous->getOpcode() != TII.getCallFrameSetupOpcode());
-
- return Previous;
- }
-
- while (MIIsInTerminatorSequence(*Previous)) {
- SplitPoint = Previous;
- if (Previous == Start)
- break;
- --Previous;
- }
-
- return SplitPoint;
-}
-
void
SelectionDAGISel::FinishBasicBlock() {
LLVM_DEBUG(dbgs() << "Total amount of phi nodes to update: "
@@ -1781,7 +1677,7 @@ SelectionDAGISel::FinishBasicBlock() {
// Add load and check to the basicblock.
FuncInfo->MBB = ParentMBB;
FuncInfo->InsertPt =
- FindSplitPointForStackProtector(ParentMBB, *TII);
+ findSplitPointForStackProtector(ParentMBB, *TII);
SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
@@ -1800,7 +1696,7 @@ SelectionDAGISel::FinishBasicBlock() {
// register allocation issues caused by us splitting the parent mbb. The
// register allocator will clean up said virtual copies later on.
MachineBasicBlock::iterator SplitPoint =
- FindSplitPointForStackProtector(ParentMBB, *TII);
+ findSplitPointForStackProtector(ParentMBB, *TII);
// Splice the terminator of ParentMBB into SuccessMBB.
SuccessMBB->splice(SuccessMBB->end(), ParentMBB,
@@ -1861,9 +1757,9 @@ SelectionDAGISel::FinishBasicBlock() {
// test, and delete the last bit test.
MachineBasicBlock *NextMBB;
- if (BTB.ContiguousRange && j + 2 == ej) {
- // Second-to-last bit-test with contiguous range: fall through to the
- // target of the final bit test.
+ if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {
+ // Second-to-last bit-test with contiguous range or omitted range
+ // check: fall through to the target of the final bit test.
NextMBB = BTB.Cases[j + 1].TargetBB;
} else if (j + 1 == ej) {
// For the last bit test, fall through to Default.
@@ -1880,7 +1776,7 @@ SelectionDAGISel::FinishBasicBlock() {
SDB->clear();
CodeGenAndEmitDAG();
- if (BTB.ContiguousRange && j + 2 == ej) {
+ if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {
// Since we're not going to use the final bit test, remove it.
BTB.Cases.pop_back();
break;
@@ -3800,7 +3696,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) {
else
Msg << "unknown intrinsic #" << iid;
}
- report_fatal_error(Msg.str());
+ report_fatal_error(Twine(Msg.str()));
}
char SelectionDAGISel::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index a903c2401264..e2db9633bfb9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -1119,7 +1119,7 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
StatepointLoweringInfo SI(DAG);
unsigned ArgBeginIndex = Call->arg_begin() - Call->op_begin();
populateCallLoweringInfo(
- SI.CLI, Call, ArgBeginIndex, Call->getNumArgOperands(), Callee,
+ SI.CLI, Call, ArgBeginIndex, Call->arg_size(), Callee,
ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : Call->getType(),
false);
if (!VarArgDisallowed)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 7f80ce37e28a..e4a69adff05b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/DivisionByConstantInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
@@ -537,7 +538,7 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
TargetLoweringOpt &TLO) const {
EVT VT = Op.getValueType();
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
}
@@ -621,7 +622,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
}
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
AssumeSingleUse);
@@ -667,12 +668,12 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumDstEltBits / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
unsigned Offset = i * NumSrcEltBits;
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
- if (!Sub.isNullValue()) {
+ if (!Sub.isZero()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
if (DemandedElts[j])
@@ -690,8 +691,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumSrcEltBits / NumDstEltBits;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Offset = (i % Scale) * NumDstEltBits;
@@ -819,13 +820,21 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
break;
}
case ISD::INSERT_SUBVECTOR: {
- // If we don't demand the inserted subvector, return the base vector.
SDValue Vec = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
uint64_t Idx = Op.getConstantOperandVal(2);
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
- if (DemandedElts.extractBits(NumSubElts, Idx) == 0)
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ // If we don't demand the inserted subvector, return the base vector.
+ if (DemandedSubElts == 0)
return Vec;
+ // If this simply widens the lowest subvector, see if we can do it earlier.
+ if (Idx == 0 && Vec.isUndef()) {
+ if (SDValue NewSub = SimplifyMultipleUseDemandedBits(
+ Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1))
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+ Op.getOperand(0), NewSub, Op.getOperand(2));
+ }
break;
}
case ISD::VECTOR_SHUFFLE: {
@@ -866,7 +875,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
unsigned Depth) const {
EVT VT = Op.getValueType();
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
Depth);
@@ -875,7 +884,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
unsigned Depth) const {
- APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits());
+ APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
Depth);
}
@@ -942,8 +951,8 @@ bool TargetLowering::SimplifyDemandedBits(
}
// If this is the root being simplified, allow it to have multiple uses,
// just set the DemandedBits/Elts to all bits.
- DemandedBits = APInt::getAllOnesValue(BitWidth);
- DemandedElts = APInt::getAllOnesValue(NumElts);
+ DemandedBits = APInt::getAllOnes(BitWidth);
+ DemandedElts = APInt::getAllOnes(NumElts);
} else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
// Not demanding any bits/elts from Op.
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
@@ -1038,7 +1047,7 @@ bool TargetLowering::SimplifyDemandedBits(
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
APInt DemandedSrcElts = DemandedElts;
- DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
KnownBits KnownSub, KnownSrc;
if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
@@ -1056,8 +1065,8 @@ bool TargetLowering::SimplifyDemandedBits(
Known = KnownBits::commonBits(Known, KnownSrc);
// Attempt to avoid multi-use src if we don't need anything from it.
- if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() ||
- !DemandedSrcElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
+ !DemandedSrcElts.isAllOnes()) {
SDValue NewSub = SimplifyMultipleUseDemandedBits(
Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
SDValue NewSrc = SimplifyMultipleUseDemandedBits(
@@ -1086,7 +1095,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
// Attempt to avoid multi-use src if we don't need anything from it.
- if (!DemandedBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
if (DemandedSrc) {
@@ -1216,7 +1225,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
@@ -1263,7 +1272,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
@@ -1306,7 +1315,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
@@ -1351,8 +1360,7 @@ bool TargetLowering::SimplifyDemandedBits(
// If the RHS is a constant, see if we can change it. Don't alter a -1
// constant because that's a 'not' op, and that is better for combining
// and codegen.
- if (!C->isAllOnesValue() &&
- DemandedBits.isSubsetOf(C->getAPIntValue())) {
+ if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
// We're flipping all demanded bits. Flip the undemanded bits too.
SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
return TLO.CombineTo(Op, New);
@@ -1360,7 +1368,7 @@ bool TargetLowering::SimplifyDemandedBits(
}
// If we can't turn this into a 'not', try to shrink the constant.
- if (!C || !C->isAllOnesValue())
+ if (!C || !C->isAllOnes())
if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
@@ -1605,7 +1613,7 @@ bool TargetLowering::SimplifyDemandedBits(
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
// the shift amount is >= the size of the datatype, which is undefined.
- if (DemandedBits.isOneValue())
+ if (DemandedBits.isOne())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
if (const APInt *SA =
@@ -1655,7 +1663,7 @@ bool TargetLowering::SimplifyDemandedBits(
Known.One.setHighBits(ShAmt);
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0) {
@@ -1781,7 +1789,7 @@ bool TargetLowering::SimplifyDemandedBits(
// If only 1 bit is demanded, replace with PARITY as long as we're before
// op legalization.
// FIXME: Limit to scalars for now.
- if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector())
+ if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
Op.getOperand(0)));
@@ -1795,9 +1803,9 @@ bool TargetLowering::SimplifyDemandedBits(
// If we only care about the highest bit, don't bother shifting right.
if (DemandedBits.isSignMask()) {
- unsigned NumSignBits =
- TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
- bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
+ unsigned MinSignedBits =
+ TLO.DAG.ComputeMinSignedBits(Op0, DemandedElts, Depth + 1);
+ bool AlreadySignExtended = ExVTBits >= MinSignedBits;
// However if the input is already sign extended we expect the sign
// extension to be dropped altogether later and do not simplify.
if (!AlreadySignExtended) {
@@ -2071,7 +2079,7 @@ bool TargetLowering::SimplifyDemandedBits(
// Demand the bits from every vector element without a constant index.
unsigned NumSrcElts = SrcEltCnt.getFixedValue();
- APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+ APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
if (CIdx->getAPIntValue().ult(NumSrcElts))
DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
@@ -2087,8 +2095,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedSrcBits.isAllOnesValue() ||
- !DemandedSrcElts.isAllOnesValue()) {
+ if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
SDValue NewOp =
@@ -2138,12 +2145,12 @@ bool TargetLowering::SimplifyDemandedBits(
TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = BitWidth / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
unsigned Offset = i * NumSrcEltBits;
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
- if (!Sub.isNullValue()) {
+ if (!Sub.isZero()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
if (DemandedElts[j])
@@ -2164,8 +2171,8 @@ bool TargetLowering::SimplifyDemandedBits(
TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumSrcEltBits / BitWidth;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Offset = (i % Scale) * BitWidth;
@@ -2222,7 +2229,7 @@ bool TargetLowering::SimplifyDemandedBits(
}
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
@@ -2245,8 +2252,8 @@ bool TargetLowering::SimplifyDemandedBits(
// is probably not useful (and could be detrimental).
ConstantSDNode *C = isConstOrConstSplat(Op1);
APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
- if (C && !C->isAllOnesValue() && !C->isOne() &&
- (C->getAPIntValue() | HighMask).isAllOnesValue()) {
+ if (C && !C->isAllOnes() && !C->isOne() &&
+ (C->getAPIntValue() | HighMask).isAllOnes()) {
SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
// Disable the nsw and nuw flags. We can no longer guarantee that we
// won't wrap after simplification.
@@ -2344,7 +2351,7 @@ static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
return SDValue();
};
- APInt KnownUndef = APInt::getNullValue(NumElts);
+ APInt KnownUndef = APInt::getZero(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
// If both inputs for this element are either constant or undef and match
// the element type, compute the constant/undef result for this element of
@@ -2371,7 +2378,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
unsigned NumElts = DemandedElts.getBitWidth();
assert(VT.isVector() && "Expected vector op");
- KnownUndef = KnownZero = APInt::getNullValue(NumElts);
+ KnownUndef = KnownZero = APInt::getZero(NumElts);
// TODO: For now we assume we know nothing about scalable vectors.
if (VT.isScalableVector())
@@ -2463,17 +2470,13 @@ bool TargetLowering::SimplifyDemandedVectorElts(
return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
KnownZero, TLO, Depth + 1);
- APInt SrcZero, SrcUndef;
- APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
+ APInt SrcDemandedElts, SrcZero, SrcUndef;
// Bitcast from 'large element' src vector to 'small element' vector, we
// must demand a source element if any DemandedElt maps to it.
if ((NumElts % NumSrcElts) == 0) {
unsigned Scale = NumElts / NumSrcElts;
- for (unsigned i = 0; i != NumElts; ++i)
- if (DemandedElts[i])
- SrcDemandedElts.setBit(i / Scale);
-
+ SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
TLO, Depth + 1))
return true;
@@ -2483,7 +2486,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// TODO - bigendian once we have test coverage.
if (TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
- APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
+ APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Ofs = (i % Scale) * EltSizeInBits;
@@ -2513,10 +2516,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// of this vector.
if ((NumSrcElts % NumElts) == 0) {
unsigned Scale = NumSrcElts / NumElts;
- for (unsigned i = 0; i != NumElts; ++i)
- if (DemandedElts[i])
- SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
-
+ SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
TLO, Depth + 1))
return true;
@@ -2525,9 +2525,9 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// the output element will be as well, assuming it was demanded.
for (unsigned i = 0; i != NumElts; ++i) {
if (DemandedElts[i]) {
- if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
+ if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
KnownZero.setBit(i);
- if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
+ if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
KnownUndef.setBit(i);
}
}
@@ -2536,7 +2536,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
case ISD::BUILD_VECTOR: {
// Check all elements and simplify any unused elements with UNDEF.
- if (!DemandedElts.isAllOnesValue()) {
+ if (!DemandedElts.isAllOnes()) {
// Don't simplify BROADCASTS.
if (llvm::any_of(Op->op_values(),
[&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
@@ -2589,7 +2589,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
APInt DemandedSrcElts = DemandedElts;
- DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
APInt SubUndef, SubZero;
if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
@@ -2609,8 +2609,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownZero.insertBits(SubZero, Idx);
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedSrcElts.isAllOnesValue() ||
- !DemandedSubElts.isAllOnesValue()) {
+ if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
Src, DemandedSrcElts, TLO.DAG, Depth + 1);
SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
@@ -2642,7 +2641,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownZero = SrcZero.extractBits(NumElts, Idx);
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedElts.isAllOnesValue()) {
+ if (!DemandedElts.isAllOnes()) {
SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
Src, DemandedSrcElts, TLO.DAG, Depth + 1);
if (NewSrc) {
@@ -2810,6 +2809,25 @@ bool TargetLowering::SimplifyDemandedVectorElts(
if (DemandedElts.isSubsetOf(KnownUndef))
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
KnownUndef.clearAllBits();
+
+ // zext - if we just need the bottom element then we can mask:
+ // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
+ if (DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian() &&
+ Src.getOpcode() == ISD::AND && Op->isOnlyUserOf(Src.getNode()) &&
+ Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
+ SDLoc DL(Op);
+ EVT SrcVT = Src.getValueType();
+ EVT SrcSVT = SrcVT.getScalarType();
+ SmallVector<SDValue> MaskElts;
+ MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
+ MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
+ SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
+ if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
+ ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
+ Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
+ }
+ }
}
break;
}
@@ -2842,7 +2860,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// Attempt to avoid multi-use ops if we don't need anything from them.
// TODO - use KnownUndef to relax the demandedelts?
- if (!DemandedElts.isAllOnesValue())
+ if (!DemandedElts.isAllOnes())
if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
return true;
break;
@@ -2869,7 +2887,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// Attempt to avoid multi-use ops if we don't need anything from them.
// TODO - use KnownUndef to relax the demandedelts?
- if (!DemandedElts.isAllOnesValue())
+ if (!DemandedElts.isAllOnes())
if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
return true;
break;
@@ -2897,7 +2915,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// Attempt to avoid multi-use ops if we don't need anything from them.
// TODO - use KnownUndef to relax the demandedelts?
- if (!DemandedElts.isAllOnesValue())
+ if (!DemandedElts.isAllOnes())
if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
return true;
break;
@@ -2923,7 +2941,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
return true;
} else {
KnownBits Known;
- APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
+ APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
TLO, Depth, AssumeSingleUse))
return true;
@@ -3111,9 +3129,9 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {
case UndefinedBooleanContent:
return CVal[0];
case ZeroOrOneBooleanContent:
- return CVal.isOneValue();
+ return CVal.isOne();
case ZeroOrNegativeOneBooleanContent:
- return CVal.isAllOnesValue();
+ return CVal.isAllOnes();
}
llvm_unreachable("Invalid boolean contents");
@@ -3140,7 +3158,7 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const {
if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
return !CN->getAPIntValue()[0];
- return CN->isNullValue();
+ return CN->isZero();
}
bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
@@ -3156,7 +3174,7 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
case TargetLowering::UndefinedBooleanContent:
case TargetLowering::ZeroOrNegativeOneBooleanContent:
- return N->isAllOnesValue() && SExt;
+ return N->isAllOnes() && SExt;
}
llvm_unreachable("Unexpected enumeration.");
}
@@ -3210,7 +3228,7 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
// Bail out if the compare operand that we want to turn into a zero is
// already a zero (otherwise, infinite loop).
auto *YConst = dyn_cast<ConstantSDNode>(Y);
- if (YConst && YConst->isNullValue())
+ if (YConst && YConst->isZero())
return SDValue();
// Transform this into: ~X & Y == 0.
@@ -3325,7 +3343,7 @@ SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
DAGCombinerInfo &DCI, const SDLoc &DL) const {
assert(isConstOrConstSplat(N1C) &&
- isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
+ isConstOrConstSplat(N1C)->getAPIntValue().isZero() &&
"Should be a comparison with 0.");
assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
"Valid only for [in]equality comparisons.");
@@ -3548,7 +3566,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
// equality comparison, then we're just comparing whether X itself is
// zero.
- if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
+ if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
N0.getOperand(0).getOpcode() == ISD::CTLZ &&
isPowerOf2_32(N0.getScalarValueSizeInBits())) {
if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
@@ -3648,8 +3666,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
(isConstFalseVal(N1C) ||
isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
- bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
- (!N1C->isNullValue() && Cond == ISD::SETNE);
+ bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
+ (!N1C->isZero() && Cond == ISD::SETNE);
if (!Inverse)
return TopSetCC;
@@ -3800,8 +3818,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Otherwise, make this a use of a zext.
return DAG.getSetCC(dl, VT, ZextOp,
DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
- } else if ((N1C->isNullValue() || N1C->isOne()) &&
- (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ } else if ((N1C->isZero() || N1C->isOne()) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
// SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
if (N0.getOpcode() == ISD::SETCC &&
isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
@@ -3894,7 +3912,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// icmp eq/ne (urem %x, %y), 0
// Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
// icmp eq/ne %x, 0
- if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
+ if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
@@ -3902,6 +3920,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
}
+ // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
+ // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
+ N1C && N1C->isAllOnes()) {
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(0, dl, OpVT),
+ Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
+ }
+
if (SDValue V =
optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
return V;
@@ -4001,7 +4030,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
- if (C1.isNullValue())
+ if (C1.isZero())
if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
VT, N0, N1, Cond, DCI, dl))
return CC;
@@ -4010,8 +4039,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// For example, when high 32-bits of i64 X are known clear:
// all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
// all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
- bool CmpZero = N1C->getAPIntValue().isNullValue();
- bool CmpNegOne = N1C->getAPIntValue().isAllOnesValue();
+ bool CmpZero = N1C->getAPIntValue().isZero();
+ bool CmpNegOne = N1C->getAPIntValue().isAllOnes();
if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
// Match or(lo,shl(hi,bw/2)) pattern.
auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
@@ -4140,7 +4169,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
const APInt &AndRHSC = AndRHS->getAPIntValue();
- if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
+ if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
unsigned ShiftBits = AndRHSC.countTrailingZeros();
if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
SDValue Shift =
@@ -4336,7 +4365,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// When division is cheap or optimizing for minimum size,
// fall through to DIVREM creation by skipping this fold.
- if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {
+ if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
if (N0.getOpcode() == ISD::UREM) {
if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
return Folded;
@@ -5050,7 +5079,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
SmallVector<SDValue, 16> Shifts, Factors;
auto BuildSDIVPattern = [&](ConstantSDNode *C) {
- if (C->isNullValue())
+ if (C->isZero())
return false;
APInt Divisor = C->getAPIntValue();
unsigned Shift = Divisor.countTrailingZeros();
@@ -5152,31 +5181,31 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
auto BuildSDIVPattern = [&](ConstantSDNode *C) {
- if (C->isNullValue())
+ if (C->isZero())
return false;
const APInt &Divisor = C->getAPIntValue();
- APInt::ms magics = Divisor.magic();
+ SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
int NumeratorFactor = 0;
int ShiftMask = -1;
- if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
+ if (Divisor.isOne() || Divisor.isAllOnes()) {
// If d is +1/-1, we just multiply the numerator by +1/-1.
NumeratorFactor = Divisor.getSExtValue();
- magics.m = 0;
- magics.s = 0;
+ magics.Magic = 0;
+ magics.ShiftAmount = 0;
ShiftMask = 0;
- } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
+ } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
// If d > 0 and m < 0, add the numerator.
NumeratorFactor = 1;
- } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
+ } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
// If d < 0 and m > 0, subtract the numerator.
NumeratorFactor = -1;
}
- MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
+ MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
- Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
+ Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
return true;
};
@@ -5297,33 +5326,33 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
auto BuildUDIVPattern = [&](ConstantSDNode *C) {
- if (C->isNullValue())
+ if (C->isZero())
return false;
// FIXME: We should use a narrower constant when the upper
// bits are known to be zero.
const APInt& Divisor = C->getAPIntValue();
- APInt::mu magics = Divisor.magicu();
+ UnsignedDivisonByConstantInfo magics = UnsignedDivisonByConstantInfo::get(Divisor);
unsigned PreShift = 0, PostShift = 0;
// If the divisor is even, we can avoid using the expensive fixup by
// shifting the divided value upfront.
- if (magics.a != 0 && !Divisor[0]) {
+ if (magics.IsAdd != 0 && !Divisor[0]) {
PreShift = Divisor.countTrailingZeros();
// Get magic number for the shifted divisor.
- magics = Divisor.lshr(PreShift).magicu(PreShift);
- assert(magics.a == 0 && "Should use cheap fixup now");
+ magics = UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
+ assert(magics.IsAdd == 0 && "Should use cheap fixup now");
}
- APInt Magic = magics.m;
+ APInt Magic = magics.Magic;
unsigned SelNPQ;
- if (magics.a == 0 || Divisor.isOneValue()) {
- assert(magics.s < Divisor.getBitWidth() &&
+ if (magics.IsAdd == 0 || Divisor.isOne()) {
+ assert(magics.ShiftAmount < Divisor.getBitWidth() &&
"We shouldn't generate an undefined shift!");
- PostShift = magics.s;
+ PostShift = magics.ShiftAmount;
SelNPQ = false;
} else {
- PostShift = magics.s - 1;
+ PostShift = magics.ShiftAmount - 1;
SelNPQ = true;
}
@@ -5331,7 +5360,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
NPQFactors.push_back(
DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
- : APInt::getNullValue(EltBits),
+ : APInt::getZero(EltBits),
dl, SVT));
PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
UseNPQ |= SelNPQ;
@@ -5511,13 +5540,13 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
// Division by 0 is UB. Leave it to be constant-folded elsewhere.
- if (CDiv->isNullValue())
+ if (CDiv->isZero())
return false;
const APInt &D = CDiv->getAPIntValue();
const APInt &Cmp = CCmp->getAPIntValue();
- ComparingWithAllZeros &= Cmp.isNullValue();
+ ComparingWithAllZeros &= Cmp.isZero();
// x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
// if C2 is not less than C1, the comparison is always false.
@@ -5529,26 +5558,26 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
// If all lanes are tautological (either all divisors are ones, or divisor
// is not greater than the constant we are comparing with),
// we will prefer to avoid the fold.
- bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane;
+ bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
HadTautologicalLanes |= TautologicalLane;
AllLanesAreTautological &= TautologicalLane;
// If we are comparing with non-zero, we need'll need to subtract said
// comparison value from the LHS. But there is no point in doing that if
// every lane where we are comparing with non-zero is tautological..
- if (!Cmp.isNullValue())
+ if (!Cmp.isZero())
AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
// Decompose D into D0 * 2^K
unsigned K = D.countTrailingZeros();
- assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
+ assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
APInt D0 = D.lshr(K);
// D is even if it has trailing zeros.
HadEvenDivisor |= (K != 0);
// D is a power-of-two if D0 is one.
// If all divisors are power-of-two, we will prefer to avoid the fold.
- AllDivisorsArePowerOfTwo &= D0.isOneValue();
+ AllDivisorsArePowerOfTwo &= D0.isOne();
// P = inv(D0, 2^W)
// 2^W requires W + 1 bits, so we have to extend and then truncate.
@@ -5556,20 +5585,20 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
APInt P = D0.zext(W + 1)
.multiplicativeInverse(APInt::getSignedMinValue(W + 1))
.trunc(W);
- assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
- assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+ assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOne() && "Multiplicative inverse sanity check.");
// Q = floor((2^W - 1) u/ D)
// R = ((2^W - 1) u% D)
APInt Q, R;
- APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R);
+ APInt::udivrem(APInt::getAllOnes(W), D, Q, R);
// If we are comparing with zero, then that comparison constant is okay,
// else it may need to be one less than that.
if (Cmp.ugt(R))
Q -= 1;
- assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
+ assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
"We are expecting that K is always less than all-ones for ShSVT");
// If the lane is tautological the result can be constant-folded.
@@ -5752,7 +5781,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// TODO: Could support comparing with non-zero too.
ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
- if (!CompTarget || !CompTarget->isNullValue())
+ if (!CompTarget || !CompTarget->isZero())
return SDValue();
bool HadIntMinDivisor = false;
@@ -5765,7 +5794,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
auto BuildSREMPattern = [&](ConstantSDNode *C) {
// Division by 0 is UB. Leave it to be constant-folded elsewhere.
- if (C->isNullValue())
+ if (C->isZero())
return false;
// FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
@@ -5778,12 +5807,12 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
HadIntMinDivisor |= D.isMinSignedValue();
// If all divisors are ones, we will prefer to avoid the fold.
- HadOneDivisor |= D.isOneValue();
- AllDivisorsAreOnes &= D.isOneValue();
+ HadOneDivisor |= D.isOne();
+ AllDivisorsAreOnes &= D.isOne();
// Decompose D into D0 * 2^K
unsigned K = D.countTrailingZeros();
- assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
+ assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
APInt D0 = D.lshr(K);
if (!D.isMinSignedValue()) {
@@ -5794,7 +5823,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// D is a power-of-two if D0 is one. This includes INT_MIN.
// If all divisors are power-of-two, we will prefer to avoid the fold.
- AllDivisorsArePowerOfTwo &= D0.isOneValue();
+ AllDivisorsArePowerOfTwo &= D0.isOne();
// P = inv(D0, 2^W)
// 2^W requires W + 1 bits, so we have to extend and then truncate.
@@ -5802,8 +5831,8 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
APInt P = D0.zext(W + 1)
.multiplicativeInverse(APInt::getSignedMinValue(W + 1))
.trunc(W);
- assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
- assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+ assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOne() && "Multiplicative inverse sanity check.");
// A = floor((2^(W - 1) - 1) / D0) & -2^K
APInt A = APInt::getSignedMaxValue(W).udiv(D0);
@@ -5818,14 +5847,14 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// Q = floor((2 * A) / (2^K))
APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
- assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&
+ assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
"We are expecting that A is always less than all-ones for SVT");
- assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
+ assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
"We are expecting that K is always less than all-ones for ShSVT");
// If the divisor is 1 the result can be constant-folded. Likewise, we
// don't care about INT_MIN lanes, those can be set to undef if appropriate.
- if (D.isOneValue()) {
+ if (D.isOne()) {
// Set P, A and K to a bogus values so we can try to splat them.
P = 0;
A = -1;
@@ -5951,7 +5980,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
SDValue IntMax = DAG.getConstant(
APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
SDValue Zero =
- DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);
+ DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT);
// Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
@@ -6777,7 +6806,7 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
// the destination signmask can't be represented by the float, so we can
// just use FP_TO_SINT directly.
const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
- APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));
+ APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
if (APFloat::opOverflow &
APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
@@ -6970,8 +6999,18 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
return SDValue();
}
-bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
- SelectionDAG &DAG) const {
+// Only expand vector types if we have the appropriate vector bit operations.
+static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
+ assert(VT.isVector() && "Expected vector type");
+ unsigned Len = VT.getScalarSizeInBits();
+ return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
+ (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
+}
+
+SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
@@ -6981,15 +7020,11 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
// TODO: Add support for irregular type lengths.
if (!(Len <= 128 && Len % 8 == 0))
- return false;
+ return SDValue();
// Only expand vector types if we have the appropriate vector bit operations.
- if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
- !isOperationLegalOrCustom(ISD::SUB, VT) ||
- !isOperationLegalOrCustom(ISD::SRL, VT) ||
- (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
- !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
- return false;
+ if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
+ return SDValue();
// This is the "best" algorithm from
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
@@ -7026,12 +7061,10 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
DAG.getConstant(Len - 8, dl, ShVT));
- Result = Op;
- return true;
+ return Op;
}
-bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
- SelectionDAG &DAG) const {
+SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
@@ -7040,10 +7073,8 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
// If the non-ZERO_UNDEF version is supported we can use that instead.
if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
- isOperationLegalOrCustom(ISD::CTLZ, VT)) {
- Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
- return true;
- }
+ isOperationLegalOrCustom(ISD::CTLZ, VT))
+ return DAG.getNode(ISD::CTLZ, dl, VT, Op);
// If the ZERO_UNDEF version is supported use that and handle the zero case.
if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
@@ -7052,17 +7083,18 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
SDValue Zero = DAG.getConstant(0, dl, VT);
SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
- Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+ return DAG.getSelect(dl, VT, SrcIsZero,
DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
- return true;
}
// Only expand vector types if we have the appropriate vector bit operations.
+ // This includes the operations needed to expand CTPOP if it isn't supported.
if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
- !isOperationLegalOrCustom(ISD::CTPOP, VT) ||
+ (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+ !canExpandVectorCTPOP(*this, VT)) ||
!isOperationLegalOrCustom(ISD::SRL, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
- return false;
+ return SDValue();
// for now, we do this:
// x = x | (x >> 1);
@@ -7079,12 +7111,10 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
}
Op = DAG.getNOT(dl, Op, VT);
- Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
- return true;
+ return DAG.getNode(ISD::CTPOP, dl, VT, Op);
}
-bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
- SelectionDAG &DAG) const {
+SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
SDValue Op = Node->getOperand(0);
@@ -7092,10 +7122,8 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
// If the non-ZERO_UNDEF version is supported we can use that instead.
if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
- isOperationLegalOrCustom(ISD::CTTZ, VT)) {
- Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);
- return true;
- }
+ isOperationLegalOrCustom(ISD::CTTZ, VT))
+ return DAG.getNode(ISD::CTTZ, dl, VT, Op);
// If the ZERO_UNDEF version is supported use that and handle the zero case.
if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
@@ -7104,19 +7132,20 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
SDValue Zero = DAG.getConstant(0, dl, VT);
SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
- Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+ return DAG.getSelect(dl, VT, SrcIsZero,
DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
- return true;
}
// Only expand vector types if we have the appropriate vector bit operations.
+ // This includes the operations needed to expand CTPOP if it isn't supported.
if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
(!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
- !isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
+ !isOperationLegalOrCustom(ISD::CTLZ, VT) &&
+ !canExpandVectorCTPOP(*this, VT)) ||
!isOperationLegalOrCustom(ISD::SUB, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
- return false;
+ return SDValue();
// for now, we use: { return popcount(~x & (x - 1)); }
// unless the target has ctlz but not ctpop, in which case we use:
@@ -7128,18 +7157,15 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
// If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
- Result =
- DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
- DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
- return true;
+ return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
+ DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
}
- Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
- return true;
+ return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
}
-bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
- SelectionDAG &DAG, bool IsNegative) const {
+SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
+ bool IsNegative) const {
SDLoc dl(N);
EVT VT = N->getValueType(0);
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
@@ -7149,27 +7175,24 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::SMAX, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
- Result = DAG.getNode(ISD::SMAX, dl, VT, Op,
- DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
- return true;
+ return DAG.getNode(ISD::SMAX, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
// abs(x) -> umin(x,sub(0,x))
if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::UMIN, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
- Result = DAG.getNode(ISD::UMIN, dl, VT, Op,
- DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
- return true;
+ return DAG.getNode(ISD::UMIN, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
// 0 - abs(x) -> smin(x, sub(0,x))
if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::SMIN, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
- Result = DAG.getNode(ISD::SMIN, dl, VT, Op,
- DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
- return true;
+ return DAG.getNode(ISD::SMIN, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
// Only expand vector types if we have the appropriate vector operations.
@@ -7178,20 +7201,19 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
(!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
(IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
- return false;
+ return SDValue();
SDValue Shift =
DAG.getNode(ISD::SRA, dl, VT, Op,
DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
if (!IsNegative) {
SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
- Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
- } else {
- // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
- SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
- Result = DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
+ return DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
}
- return true;
+
+ // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
+ return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
}
SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
@@ -7266,34 +7288,31 @@ SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
// TODO: We can easily support i4/i2 legal types if any target ever does.
if (Sz >= 8 && isPowerOf2_32(Sz)) {
// Create the masks - repeating the pattern every byte.
- APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0));
- APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC));
- APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA));
- APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F));
- APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33));
- APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55));
+ APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
+ APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
+ APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
// BSWAP if the type is wider than a single byte.
Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
- // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4)
- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT));
- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT));
+ // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
- // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2)
- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT));
- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT));
+ // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
- // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1)
- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT));
- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT));
+ // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
return Tmp;
@@ -7803,13 +7822,15 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
EVT VecVT, const SDLoc &dl,
- unsigned NumSubElts) {
- if (!VecVT.isScalableVector() && isa<ConstantSDNode>(Idx))
- return Idx;
+ ElementCount SubEC) {
+ assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
+ "Cannot index a scalable vector within a fixed-width vector");
- EVT IdxVT = Idx.getValueType();
unsigned NElts = VecVT.getVectorMinNumElements();
- if (VecVT.isScalableVector()) {
+ unsigned NumSubElts = SubEC.getKnownMinValue();
+ EVT IdxVT = Idx.getValueType();
+
+ if (VecVT.isScalableVector() && !SubEC.isScalable()) {
// If this is a constant index and we know the value plus the number of the
// elements in the subvector minus one is less than the minimum number of
// elements then it's safe to return Idx.
@@ -7856,16 +7877,16 @@ SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
"Converting bits to bytes lost precision");
-
- // Scalable vectors don't need clamping as these are checked at compile time
- if (SubVecVT.isFixedLengthVector()) {
- assert(SubVecVT.getVectorElementType() == EltVT &&
- "Sub-vector must be a fixed vector with matching element type");
- Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
- SubVecVT.getVectorNumElements());
- }
+ assert(SubVecVT.getVectorElementType() == EltVT &&
+ "Sub-vector must be a vector with matching element type");
+ Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
+ SubVecVT.getVectorElementCount());
EVT IdxVT = Index.getValueType();
+ if (SubVecVT.isScalableVector())
+ Index =
+ DAG.getNode(ISD::MUL, dl, IdxVT, Index,
+ DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
DAG.getConstant(EltSize, dl, IdxVT));
@@ -7921,7 +7942,7 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDLoc dl(Op);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- if (C->isNullValue() && CC == ISD::SETEQ) {
+ if (C->isZero() && CC == ISD::SETEQ) {
EVT VT = Op.getOperand(0).getValueType();
SDValue Zext = Op.getOperand(0);
if (VT.bitsLT(MVT::i32)) {
@@ -7949,10 +7970,8 @@ TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT,
(IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED);
// Scaling is unimportant for bytes, canonicalize to unscaled.
- if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) {
- IsScaledIndex = false;
- IndexType = IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;
- }
+ if (IsScaledIndex && MemVT.getScalarType() == MVT::i8)
+ return IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;
return IndexType;
}
@@ -8073,14 +8092,12 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
}
- // SatMax -> Overflow && SumDiff < 0
- // SatMin -> Overflow && SumDiff >= 0
+ // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
APInt MinVal = APInt::getSignedMinValue(BitWidth);
- APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
- SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
- SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
- Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
+ SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
+ DAG.getConstant(BitWidth - 1, dl, VT));
+ Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
}
@@ -8394,7 +8411,7 @@ void TargetLowering::expandSADDSUBO(
// If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
- if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
+ if (isOperationLegal(OpcSat, LHS.getValueType())) {
SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
@@ -8447,8 +8464,8 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
if (VT.isVector())
- WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
- VT.getVectorNumElements());
+ WideVT =
+ EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
SDValue BottomHalf;
SDValue TopHalf;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
index c70620fd7532..7f9518e4c075 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
@@ -50,7 +50,6 @@ STATISTIC(NumFinished, "Number of splits finished");
STATISTIC(NumSimple, "Number of splits that were simple");
STATISTIC(NumCopies, "Number of copies inserted for splitting");
STATISTIC(NumRemats, "Number of rematerialized defs for splitting");
-STATISTIC(NumRepairs, "Number of invalid live ranges repaired");
//===----------------------------------------------------------------------===//
// Last Insert Point Analysis
@@ -160,7 +159,6 @@ void SplitAnalysis::clear() {
UseBlocks.clear();
ThroughBlocks.clear();
CurLI = nullptr;
- DidRepairRange = false;
}
/// analyzeUses - Count instructions, basic blocks, and loops using CurLI.
@@ -188,20 +186,7 @@ void SplitAnalysis::analyzeUses() {
UseSlots.end());
// Compute per-live block info.
- if (!calcLiveBlockInfo()) {
- // FIXME: calcLiveBlockInfo found inconsistencies in the live range.
- // I am looking at you, RegisterCoalescer!
- DidRepairRange = true;
- ++NumRepairs;
- LLVM_DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n");
- const_cast<LiveIntervals&>(LIS)
- .shrinkToUses(const_cast<LiveInterval*>(CurLI));
- UseBlocks.clear();
- ThroughBlocks.clear();
- bool fixed = calcLiveBlockInfo();
- (void)fixed;
- assert(fixed && "Couldn't fix broken live interval");
- }
+ calcLiveBlockInfo();
LLVM_DEBUG(dbgs() << "Analyze counted " << UseSlots.size() << " instrs in "
<< UseBlocks.size() << " blocks, through "
@@ -210,11 +195,11 @@ void SplitAnalysis::analyzeUses() {
/// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks
/// where CurLI is live.
-bool SplitAnalysis::calcLiveBlockInfo() {
+void SplitAnalysis::calcLiveBlockInfo() {
ThroughBlocks.resize(MF.getNumBlockIDs());
NumThroughBlocks = NumGapBlocks = 0;
if (CurLI->empty())
- return true;
+ return;
LiveInterval::const_iterator LVI = CurLI->begin();
LiveInterval::const_iterator LVE = CurLI->end();
@@ -240,8 +225,7 @@ bool SplitAnalysis::calcLiveBlockInfo() {
ThroughBlocks.set(BI.MBB->getNumber());
// The range shouldn't end mid-block if there are no uses. This shouldn't
// happen.
- if (LVI->end < Stop)
- return false;
+ assert(LVI->end >= Stop && "range ends mid block with no uses");
} else {
// This block has uses. Find the first and last uses in the block.
BI.FirstInstr = *UseI;
@@ -312,7 +296,6 @@ bool SplitAnalysis::calcLiveBlockInfo() {
}
assert(getNumLiveBlocks() == countLiveBlocks(CurLI) && "Bad block count");
- return true;
}
unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
@@ -529,19 +512,12 @@ SlotIndex SplitEditor::buildSingleSubRegCopy(Register FromReg, Register ToReg,
| getInternalReadRegState(!FirstCopy), SubIdx)
.addReg(FromReg, 0, SubIdx);
- BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
SlotIndexes &Indexes = *LIS.getSlotIndexes();
if (FirstCopy) {
Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
} else {
CopyMI->bundleWithPred();
}
- LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubIdx);
- DestLI.refineSubRanges(Allocator, LaneMask,
- [Def, &Allocator](LiveInterval::SubRange &SR) {
- SR.createDeadDef(Def, Allocator);
- },
- Indexes, TRI);
return Def;
}
@@ -549,11 +525,11 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,
LaneBitmask LaneMask, MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx) {
const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+ SlotIndexes &Indexes = *LIS.getSlotIndexes();
if (LaneMask.all() || LaneMask == MRI.getMaxLaneMaskForVReg(FromReg)) {
// The full vreg is copied.
MachineInstr *CopyMI =
BuildMI(MBB, InsertBefore, DebugLoc(), Desc, ToReg).addReg(FromReg);
- SlotIndexes &Indexes = *LIS.getSlotIndexes();
return Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
}
@@ -567,18 +543,26 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,
const TargetRegisterClass *RC = MRI.getRegClass(FromReg);
assert(RC == MRI.getRegClass(ToReg) && "Should have same reg class");
- SmallVector<unsigned, 8> Indexes;
+ SmallVector<unsigned, 8> SubIndexes;
// Abort if we cannot possibly implement the COPY with the given indexes.
- if (!TRI.getCoveringSubRegIndexes(MRI, RC, LaneMask, Indexes))
+ if (!TRI.getCoveringSubRegIndexes(MRI, RC, LaneMask, SubIndexes))
report_fatal_error("Impossible to implement partial COPY");
SlotIndex Def;
- for (unsigned BestIdx : Indexes) {
+ for (unsigned BestIdx : SubIndexes) {
Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx,
DestLI, Late, Def);
}
+ BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
+ DestLI.refineSubRanges(
+ Allocator, LaneMask,
+ [Def, &Allocator](LiveInterval::SubRange &SR) {
+ SR.createDeadDef(Def, Allocator);
+ },
+ Indexes, TRI);
+
return Def;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
index fbcffacb49ab..902546fe16d8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
@@ -160,14 +160,11 @@ private:
/// NumThroughBlocks - Number of live-through blocks.
unsigned NumThroughBlocks;
- /// DidRepairRange - analyze was forced to shrinkToUses().
- bool DidRepairRange;
-
// Sumarize statistics by counting instructions using CurLI.
void analyzeUses();
/// calcLiveBlockInfo - Compute per-block information about CurLI.
- bool calcLiveBlockInfo();
+ void calcLiveBlockInfo();
public:
SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
@@ -177,11 +174,6 @@ public:
/// split.
void analyze(const LiveInterval *li);
- /// didRepairRange() - Returns true if CurLI was invalid and has been repaired
- /// by analyze(). This really shouldn't happen, but sometimes the coalescer
- /// can create live ranges that end in mid-air.
- bool didRepairRange() const { return DidRepairRange; }
-
/// clear - clear all data structures so SplitAnalysis is ready to analyze a
/// new interval.
void clear();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
index 162f3aab024d..623d5da9831e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
@@ -687,6 +687,8 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
// Walk the instructions in the block to look for start/end ops.
for (MachineInstr &MI : *MBB) {
+ if (MI.isDebugInstr())
+ continue;
if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
MI.getOpcode() == TargetOpcode::LIFETIME_END) {
int Slot = getStartOrEndSlot(MI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
index 9f229d51b985..7445f77c955d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
@@ -148,10 +148,8 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
return false;
bool NeedsProtector = false;
- for (StructType::element_iterator I = ST->element_begin(),
- E = ST->element_end();
- I != E; ++I)
- if (ContainsProtectableArray(*I, IsLarge, Strong, true)) {
+ for (Type *ET : ST->elements())
+ if (ContainsProtectableArray(ET, IsLarge, Strong, true)) {
// If the element is a protectable array and is large (>= SSPBufferSize)
// then we are done. If the protectable array is not large, then
// keep looking in case a subsequent element is a large array.
@@ -436,13 +434,11 @@ bool StackProtector::InsertStackProtectors() {
// protection in SDAG.
bool SupportsSelectionDAGSP =
TLI->useStackGuardXorFP() ||
- (EnableSelectionDAGSP && !TM->Options.EnableFastISel &&
- !TM->Options.EnableGlobalISel);
- AllocaInst *AI = nullptr; // Place on stack that stores the stack guard.
+ (EnableSelectionDAGSP && !TM->Options.EnableFastISel);
+ AllocaInst *AI = nullptr; // Place on stack that stores the stack guard.
- for (Function::iterator I = F->begin(), E = F->end(); I != E;) {
- BasicBlock *BB = &*I++;
- ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
+ for (BasicBlock &BB : llvm::make_early_inc_range(*F)) {
+ ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator());
if (!RI)
continue;
@@ -530,23 +526,23 @@ bool StackProtector::InsertStackProtectors() {
// Split the basic block before the return instruction.
BasicBlock *NewBB =
- BB->splitBasicBlock(CheckLoc->getIterator(), "SP_return");
+ BB.splitBasicBlock(CheckLoc->getIterator(), "SP_return");
// Update the dominator tree if we need to.
- if (DT && DT->isReachableFromEntry(BB)) {
- DT->addNewBlock(NewBB, BB);
- DT->addNewBlock(FailBB, BB);
+ if (DT && DT->isReachableFromEntry(&BB)) {
+ DT->addNewBlock(NewBB, &BB);
+ DT->addNewBlock(FailBB, &BB);
}
// Remove default branch instruction to the new BB.
- BB->getTerminator()->eraseFromParent();
+ BB.getTerminator()->eraseFromParent();
// Move the newly created basic block to the point right after the old
// basic block so that it's in the "fall through" position.
- NewBB->moveAfter(BB);
+ NewBB->moveAfter(&BB);
// Generate the stack protector instructions in the old basic block.
- IRBuilder<> B(BB);
+ IRBuilder<> B(&BB);
Value *Guard = getStackGuard(TLI, M, B);
LoadInst *LI2 = B.CreateLoad(B.getInt8PtrTy(), AI, true);
Value *Cmp = B.CreateICmpEQ(Guard, LI2);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
index ebe00bd7402f..9aea5a7a8853 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -169,7 +169,7 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
if (!LS->hasInterval(FI))
continue;
LiveInterval &li = LS->getInterval(FI);
- if (!MI.isDebugValue())
+ if (!MI.isDebugInstr())
li.incrementWeight(
LiveIntervals::getSpillWeight(false, true, MBFI, MI));
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
index dfcec32d9537..36a02d5beb4b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -405,7 +405,7 @@ bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters,
if (Low.isStrictlyPositive() && High.slt(BitWidth)) {
// Optimize the case where all the case values fit in a word without having
// to subtract minValue. In this case, we can optimize away the subtraction.
- LowBound = APInt::getNullValue(Low.getBitWidth());
+ LowBound = APInt::getZero(Low.getBitWidth());
CmpRange = High;
ContiguousRange = false;
} else {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
index af735f2a0216..943bd18c6c8b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -70,6 +70,12 @@ static cl::opt<unsigned> TailDupIndirectBranchSize(
"end with indirect branches."), cl::init(20),
cl::Hidden);
+static cl::opt<unsigned> TailDupJmpTableLoopSize(
+ "tail-dup-jmptable-loop-size",
+ cl::desc("Maximum loop latches to consider tail duplication that are "
+ "successors of loop header."),
+ cl::init(128), cl::Hidden);
+
static cl::opt<bool>
TailDupVerify("tail-dup-verify",
cl::desc("Verify sanity of PHI instructions during taildup"),
@@ -100,12 +106,11 @@ void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
}
static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
- for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
- MachineBasicBlock *MBB = &*I;
- SmallSetVector<MachineBasicBlock *, 8> Preds(MBB->pred_begin(),
- MBB->pred_end());
- MachineBasicBlock::iterator MI = MBB->begin();
- while (MI != MBB->end()) {
+ for (MachineBasicBlock &MBB : llvm::drop_begin(MF)) {
+ SmallSetVector<MachineBasicBlock *, 8> Preds(MBB.pred_begin(),
+ MBB.pred_end());
+ MachineBasicBlock::iterator MI = MBB.begin();
+ while (MI != MBB.end()) {
if (!MI->isPHI())
break;
for (MachineBasicBlock *PredBB : Preds) {
@@ -118,7 +123,7 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
}
}
if (!Found) {
- dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": "
+ dbgs() << "Malformed PHI in " << printMBBReference(MBB) << ": "
<< *MI;
dbgs() << " missing input from predecessor "
<< printMBBReference(*PredBB) << '\n';
@@ -129,14 +134,14 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB();
if (CheckExtra && !Preds.count(PHIBB)) {
- dbgs() << "Warning: malformed PHI in " << printMBBReference(*MBB)
+ dbgs() << "Warning: malformed PHI in " << printMBBReference(MBB)
<< ": " << *MI;
dbgs() << " extra input from predecessor "
<< printMBBReference(*PHIBB) << '\n';
llvm_unreachable(nullptr);
}
if (PHIBB->getNumber() < 0) {
- dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": "
+ dbgs() << "Malformed PHI in " << printMBBReference(MBB) << ": "
<< *MI;
dbgs() << " non-existing " << printMBBReference(*PHIBB) << '\n';
llvm_unreachable(nullptr);
@@ -279,18 +284,17 @@ bool TailDuplicator::tailDuplicateBlocks() {
VerifyPHIs(*MF, true);
}
- for (MachineFunction::iterator I = ++MF->begin(), E = MF->end(); I != E;) {
- MachineBasicBlock *MBB = &*I++;
-
+ for (MachineBasicBlock &MBB :
+ llvm::make_early_inc_range(llvm::drop_begin(*MF))) {
if (NumTails == TailDupLimit)
break;
- bool IsSimple = isSimpleBB(MBB);
+ bool IsSimple = isSimpleBB(&MBB);
- if (!shouldTailDuplicate(IsSimple, *MBB))
+ if (!shouldTailDuplicate(IsSimple, MBB))
continue;
- MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB, nullptr);
+ MadeChange |= tailDuplicateAndUpdate(IsSimple, &MBB, nullptr);
}
if (PreRegAlloc && TailDupVerify)
@@ -565,6 +569,29 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
if (TailBB.isSuccessor(&TailBB))
return false;
+ // When doing tail-duplication with jumptable loops like:
+ // 1 -> 2 <-> 3 |
+ // \ <-> 4 |
+ // \ <-> 5 |
+ // \ <-> ... |
+ // \---> rest |
+ // quadratic number of edges and much more loops are added to CFG. This
+ // may cause compile time regression when jumptable is quiet large.
+ // So set the limit on jumptable cases.
+ auto isLargeJumpTableLoop = [](const MachineBasicBlock &TailBB) {
+ const SmallPtrSet<const MachineBasicBlock *, 8> Preds(TailBB.pred_begin(),
+ TailBB.pred_end());
+ // Check the basic block has large number of successors, all of them only
+ // have one successor which is the basic block itself.
+ return llvm::count_if(
+ TailBB.successors(), [&](const MachineBasicBlock *SuccBB) {
+ return Preds.count(SuccBB) && SuccBB->succ_size() == 1;
+ }) > TailDupJmpTableLoopSize;
+ };
+
+ if (isLargeJumpTableLoop(TailBB))
+ return false;
+
// Set the limit on the cost to duplicate. When optimizing for size,
// duplicate only one, because one branch instruction can be eliminated to
// compensate for the duplication.
@@ -874,18 +901,15 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
// Clone the contents of TailBB into PredBB.
DenseMap<Register, RegSubRegPair> LocalVRMap;
SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos;
- for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end();
- I != E; /* empty */) {
- MachineInstr *MI = &*I;
- ++I;
- if (MI->isPHI()) {
+ for (MachineInstr &MI : llvm::make_early_inc_range(*TailBB)) {
+ if (MI.isPHI()) {
// Replace the uses of the def of the PHI with the register coming
// from PredBB.
- processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
+ processPHI(&MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
} else {
// Replace def of virtual registers with new registers, and update
// uses with PHI source register or the new registers.
- duplicateInstruction(MI, TailBB, PredBB, LocalVRMap, UsedByPhi);
+ duplicateInstruction(&MI, TailBB, PredBB, LocalVRMap, UsedByPhi);
}
}
appendCopies(PredBB, CopyInfos, Copies);
@@ -930,44 +954,56 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
// There may be a branch to the layout successor. This is unlikely but it
// happens. The correct thing to do is to remove the branch before
// duplicating the instructions in all cases.
- TII->removeBranch(*PrevBB);
- if (PreRegAlloc) {
- DenseMap<Register, RegSubRegPair> LocalVRMap;
- SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos;
- MachineBasicBlock::iterator I = TailBB->begin();
- // Process PHI instructions first.
- while (I != TailBB->end() && I->isPHI()) {
- // Replace the uses of the def of the PHI with the register coming
- // from PredBB.
- MachineInstr *MI = &*I++;
- processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true);
- }
+ bool RemovedBranches = TII->removeBranch(*PrevBB) != 0;
+
+ // If there are still tail instructions, abort the merge
+ if (PrevBB->getFirstTerminator() == PrevBB->end()) {
+ if (PreRegAlloc) {
+ DenseMap<Register, RegSubRegPair> LocalVRMap;
+ SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ // Process PHI instructions first.
+ while (I != TailBB->end() && I->isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ MachineInstr *MI = &*I++;
+ processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi,
+ true);
+ }
- // Now copy the non-PHI instructions.
- while (I != TailBB->end()) {
- // Replace def of virtual registers with new registers, and update
- // uses with PHI source register or the new registers.
- MachineInstr *MI = &*I++;
- assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
- duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi);
- MI->eraseFromParent();
+ // Now copy the non-PHI instructions.
+ while (I != TailBB->end()) {
+ // Replace def of virtual registers with new registers, and update
+ // uses with PHI source register or the new registers.
+ MachineInstr *MI = &*I++;
+ assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
+ duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi);
+ MI->eraseFromParent();
+ }
+ appendCopies(PrevBB, CopyInfos, Copies);
+ } else {
+ TII->removeBranch(*PrevBB);
+ // No PHIs to worry about, just splice the instructions over.
+ PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
}
- appendCopies(PrevBB, CopyInfos, Copies);
- } else {
- TII->removeBranch(*PrevBB);
- // No PHIs to worry about, just splice the instructions over.
- PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
- }
- PrevBB->removeSuccessor(PrevBB->succ_begin());
- assert(PrevBB->succ_empty());
- PrevBB->transferSuccessors(TailBB);
+ PrevBB->removeSuccessor(PrevBB->succ_begin());
+ assert(PrevBB->succ_empty());
+ PrevBB->transferSuccessors(TailBB);
- // Update branches in PrevBB based on Tail's layout successor.
- if (ShouldUpdateTerminators)
- PrevBB->updateTerminator(TailBB->getNextNode());
+ // Update branches in PrevBB based on Tail's layout successor.
+ if (ShouldUpdateTerminators)
+ PrevBB->updateTerminator(TailBB->getNextNode());
- TDBBs.push_back(PrevBB);
- Changed = true;
+ TDBBs.push_back(PrevBB);
+ Changed = true;
+ } else {
+ LLVM_DEBUG(dbgs() << "Abort merging blocks, the predecessor still "
+ "contains terminator instructions");
+ // Return early if no changes were made
+ if (!Changed)
+ return RemovedBranches;
+ }
+ Changed |= RemovedBranches;
}
// If this is after register allocation, there are no phis to fix.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 2e4a656ea0c8..e74b3195a130 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -366,7 +366,7 @@ bool TargetInstrInfo::hasLoadFromStackSlot(
oe = MI.memoperands_end();
o != oe; ++o) {
if ((*o)->isLoad() &&
- dyn_cast_or_null<FixedStackPseudoSourceValue>((*o)->getPseudoValue()))
+ isa_and_nonnull<FixedStackPseudoSourceValue>((*o)->getPseudoValue()))
Accesses.push_back(*o);
}
return Accesses.size() != StartSize;
@@ -380,7 +380,7 @@ bool TargetInstrInfo::hasStoreToStackSlot(
oe = MI.memoperands_end();
o != oe; ++o) {
if ((*o)->isStore() &&
- dyn_cast_or_null<FixedStackPseudoSourceValue>((*o)->getPseudoValue()))
+ isa_and_nonnull<FixedStackPseudoSourceValue>((*o)->getPseudoValue()))
Accesses.push_back(*o);
}
return Accesses.size() != StartSize;
@@ -1264,22 +1264,6 @@ int TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
}
-/// If we can determine the operand latency from the def only, without itinerary
-/// lookup, do so. Otherwise return -1.
-int TargetInstrInfo::computeDefOperandLatency(
- const InstrItineraryData *ItinData, const MachineInstr &DefMI) const {
-
- // Let the target hook getInstrLatency handle missing itineraries.
- if (!ItinData)
- return getInstrLatency(ItinData, DefMI);
-
- if(ItinData->isEmpty())
- return defaultDefLatency(ItinData->SchedModel, DefMI);
-
- // ...operand lookup required
- return -1;
-}
-
bool TargetInstrInfo::getRegSequenceInputs(
const MachineInstr &MI, unsigned DefIdx,
SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
index f3e0cc7c1f2a..c4043dcf0765 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -52,6 +52,7 @@
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
@@ -236,6 +237,8 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
return FPEXT_F16_F32;
if (RetVT == MVT::f64)
return FPEXT_F16_F64;
+ if (RetVT == MVT::f80)
+ return FPEXT_F16_F80;
if (RetVT == MVT::f128)
return FPEXT_F16_F128;
} else if (OpVT == MVT::f32) {
@@ -659,7 +662,7 @@ RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
/// InitCmpLibcallCCs - Set default comparison libcall CC.
static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
- memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
+ std::fill(CCs, CCs + RTLIB::UNKNOWN_LIBCALL, ISD::SETCC_INVALID);
CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
CCs[RTLIB::OEQ_F128] = ISD::SETEQ;
@@ -896,8 +899,6 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FRINT, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
- setOperationAction(ISD::FROUND, VT, Expand);
- setOperationAction(ISD::FROUNDEVEN, VT, Expand);
setOperationAction(ISD::LROUND, VT, Expand);
setOperationAction(ISD::LLROUND, VT, Expand);
setOperationAction(ISD::LRINT, VT, Expand);
@@ -924,8 +925,15 @@ EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
if (LHSTy.isVector())
return LHSTy;
- return LegalTypes ? getScalarShiftAmountTy(DL, LHSTy)
- : getPointerTy(DL);
+ MVT ShiftVT =
+ LegalTypes ? getScalarShiftAmountTy(DL, LHSTy) : getPointerTy(DL);
+ // If any possible shift value won't fit in the prefered type, just use
+ // something safe. Assume it will be legalized when the shift is expanded.
+ if (ShiftVT.getSizeInBits() < Log2_32_Ceil(LHSTy.getSizeInBits()))
+ ShiftVT = MVT::i32;
+ assert(ShiftVT.getSizeInBits() >= Log2_32_Ceil(LHSTy.getSizeInBits()) &&
+ "ShiftVT is still too small!");
+ return ShiftVT;
}
bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
@@ -1556,7 +1564,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context,
// Scalable vectors cannot be scalarized, so handle the legalisation of the
// types like done elsewhere in SelectionDAG.
- if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.getKnownMinValue())) {
+ if (EltCnt.isScalable()) {
LegalizeKind LK;
EVT PartVT = VT;
do {
@@ -1565,16 +1573,14 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context,
PartVT = LK.second;
} while (LK.first != TypeLegal);
- NumIntermediates = VT.getVectorElementCount().getKnownMinValue() /
- PartVT.getVectorElementCount().getKnownMinValue();
+ if (!PartVT.isVector()) {
+ report_fatal_error(
+ "Don't know how to legalize this scalable vector type");
+ }
- // FIXME: This code needs to be extended to handle more complex vector
- // breakdowns, like nxv7i64 -> nxv8i64 -> 4 x nxv2i64. Currently the only
- // supported cases are vectors that are broken down into equal parts
- // such as nxv6i64 -> 3 x nxv2i64.
- assert((PartVT.getVectorElementCount() * NumIntermediates) ==
- VT.getVectorElementCount() &&
- "Expected an integer multiple of PartVT");
+ NumIntermediates =
+ divideCeil(VT.getVectorElementCount().getKnownMinValue(),
+ PartVT.getVectorElementCount().getKnownMinValue());
IntermediateVT = PartVT;
RegisterVT = getRegisterType(Context, IntermediateVT);
return NumIntermediates;
@@ -1657,9 +1663,9 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
EVT VT = ValueVTs[j];
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
- if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
+ if (attr.hasRetAttr(Attribute::SExt))
ExtendKind = ISD::SIGN_EXTEND;
- else if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt))
+ else if (attr.hasRetAttr(Attribute::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
// FIXME: C calling convention requires the return type to be promoted to
@@ -1679,13 +1685,13 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
- if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::InReg))
+ if (attr.hasRetAttr(Attribute::InReg))
Flags.setInReg();
// Propagate extension type if any
- if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
+ if (attr.hasRetAttr(Attribute::SExt))
Flags.setSExt();
- else if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt))
+ else if (attr.hasRetAttr(Attribute::ZExt))
Flags.setZExt();
for (unsigned i = 0; i < NumParts; ++i)
@@ -1696,7 +1702,7 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. This is the actual
/// alignment, not its logarithm.
-unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty,
+uint64_t TargetLoweringBase::getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const {
return DL.getABITypeAlign(Ty).value();
}
@@ -1749,8 +1755,9 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
const DataLayout &DL, LLT Ty,
const MachineMemOperand &MMO,
bool *Fast) const {
- return allowsMemoryAccess(Context, DL, getMVTForLLT(Ty), MMO.getAddrSpace(),
- MMO.getAlign(), MMO.getFlags(), Fast);
+ EVT VT = getApproximateEVTForLLT(Ty, DL, Context);
+ return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(),
+ MMO.getFlags(), Fast);
}
//===----------------------------------------------------------------------===//
@@ -1849,8 +1856,12 @@ TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
while (true) {
LegalizeKind LK = getTypeConversion(C, MTy);
- if (LK.first == TypeScalarizeScalableVector)
- return std::make_pair(InstructionCost::getInvalid(), MVT::getVT(Ty));
+ if (LK.first == TypeScalarizeScalableVector) {
+ // Ensure we return a sensible simple VT here, since many callers of this
+ // function require it.
+ MVT VT = MTy.isSimple() ? MTy.getSimpleVT() : MVT::i64;
+ return std::make_pair(InstructionCost::getInvalid(), VT);
+ }
if (LK.first == TypeLegal)
return std::make_pair(Cost, MTy.getSimpleVT());
@@ -1980,9 +1991,11 @@ void TargetLoweringBase::insertSSPDeclarations(Module &M) const {
auto *GV = new GlobalVariable(M, Type::getInt8PtrTy(M.getContext()), false,
GlobalVariable::ExternalLinkage, nullptr,
"__stack_chk_guard");
+
+ // FreeBSD has "__stack_chk_guard" defined externally on libc.so
if (TM.getRelocationModel() == Reloc::Static &&
!TM.getTargetTriple().isWindowsGNUEnvironment() &&
- !(TM.getTargetTriple().isPPC64() && TM.getTargetTriple().isOSFreeBSD()))
+ !(TM.getTargetTriple().isPPC64() && TM.getTargetTriple().isOSFreeBSD()))
GV->setDSOLocal(true);
}
}
@@ -2021,6 +2034,12 @@ bool TargetLoweringBase::isJumpTableRelative() const {
return getTargetMachine().isPositionIndependent();
}
+Align TargetLoweringBase::getPrefLoopAlignment(MachineLoop *ML) const {
+ if (TM.Options.LoopAlignment)
+ return Align(TM.Options.LoopAlignment);
+ return PrefLoopAlignment;
+}
+
//===----------------------------------------------------------------------===//
// Reciprocal Estimates
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index de096f95afcb..1d3bb286c882 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -1495,7 +1495,7 @@ void TargetLoweringObjectFileMachO::getNameWithPrefix(
SmallVectorImpl<char> &OutName, const GlobalValue *GV,
const TargetMachine &TM) const {
bool CannotUsePrivateLabel = true;
- if (auto *GO = GV->getBaseObject()) {
+ if (auto *GO = GV->getAliaseeObject()) {
SectionKind GOKind = TargetLoweringObjectFile::getKindForGlobal(GO, TM);
const MCSection *TheSection = SectionForGlobal(GO, GOKind, TM);
CannotUsePrivateLabel =
@@ -1566,7 +1566,7 @@ static int getSelectionForCOFF(const GlobalValue *GV) {
if (const Comdat *C = GV->getComdat()) {
const GlobalValue *ComdatKey = getComdatGVForCOFF(GV);
if (const auto *GA = dyn_cast<GlobalAlias>(ComdatKey))
- ComdatKey = GA->getBaseObject();
+ ComdatKey = GA->getAliaseeObject();
if (ComdatKey == GV) {
switch (C->getSelectionKind()) {
case Comdat::Any:
@@ -1945,7 +1945,7 @@ static std::string APIntToHexString(const APInt &AI) {
static std::string scalarConstantToHexString(const Constant *C) {
Type *Ty = C->getType();
if (isa<UndefValue>(C)) {
- return APIntToHexString(APInt::getNullValue(Ty->getPrimitiveSizeInBits()));
+ return APIntToHexString(APInt::getZero(Ty->getPrimitiveSizeInBits()));
} else if (const auto *CFP = dyn_cast<ConstantFP>(C)) {
return APIntToHexString(CFP->getValueAPF().bitcastToAPInt());
} else if (const auto *CI = dyn_cast<ConstantInt>(C)) {
@@ -2417,7 +2417,20 @@ bool TargetLoweringObjectFileXCOFF::shouldPutJumpTableInFunctionSection(
MCSection *TargetLoweringObjectFileXCOFF::getSectionForConstant(
const DataLayout &DL, SectionKind Kind, const Constant *C,
Align &Alignment) const {
- //TODO: Enable emiting constant pool to unique sections when we support it.
+ // TODO: Enable emiting constant pool to unique sections when we support it.
+ if (Alignment > Align(16))
+ report_fatal_error("Alignments greater than 16 not yet supported.");
+
+ if (Alignment == Align(8)) {
+ assert(ReadOnly8Section && "Section should always be initialized.");
+ return ReadOnly8Section;
+ }
+
+ if (Alignment == Align(16)) {
+ assert(ReadOnly16Section && "Section should always be initialized.");
+ return ReadOnly16Section;
+ }
+
return ReadOnlySection;
}
@@ -2446,7 +2459,8 @@ MCSection *TargetLoweringObjectFileXCOFF::getStaticDtorSection(
const MCExpr *TargetLoweringObjectFileXCOFF::lowerRelativeReference(
const GlobalValue *LHS, const GlobalValue *RHS,
const TargetMachine &TM) const {
- report_fatal_error("XCOFF not yet implemented.");
+ /* Not implemented yet, but don't crash, return nullptr. */
+ return nullptr;
}
XCOFF::StorageClass
@@ -2476,12 +2490,12 @@ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(const GlobalValue *GV) {
MCSymbol *TargetLoweringObjectFileXCOFF::getFunctionEntryPointSymbol(
const GlobalValue *Func, const TargetMachine &TM) const {
- assert(
- (isa<Function>(Func) ||
- (isa<GlobalAlias>(Func) &&
- isa_and_nonnull<Function>(cast<GlobalAlias>(Func)->getBaseObject()))) &&
- "Func must be a function or an alias which has a function as base "
- "object.");
+ assert((isa<Function>(Func) ||
+ (isa<GlobalAlias>(Func) &&
+ isa_and_nonnull<Function>(
+ cast<GlobalAlias>(Func)->getAliaseeObject()))) &&
+ "Func must be a function or an alias which has a function as base "
+ "object.");
SmallString<128> NameStr;
NameStr.push_back('.');
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
index 4024fd452fc4..402e21d3708b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -172,6 +172,24 @@ static cl::opt<bool>
FSNoFinalDiscrim("fs-no-final-discrim", cl::init(false), cl::Hidden,
cl::desc("Do not insert FS-AFDO discriminators before "
"emit."));
+// Disable MIRProfileLoader before RegAlloc. This is for for debugging and
+// tuning purpose.
+static cl::opt<bool> DisableRAFSProfileLoader(
+ "disable-ra-fsprofile-loader", cl::init(true), cl::Hidden,
+ cl::desc("Disable MIRProfileLoader before RegAlloc"));
+// Disable MIRProfileLoader before BloackPlacement. This is for for debugging
+// and tuning purpose.
+static cl::opt<bool> DisableLayoutFSProfileLoader(
+ "disable-layout-fsprofile-loader", cl::init(true), cl::Hidden,
+ cl::desc("Disable MIRProfileLoader before BlockPlacement"));
+// Specify FSProfile file name.
+static cl::opt<std::string>
+ FSProfileFile("fs-profile-file", cl::init(""), cl::value_desc("filename"),
+ cl::desc("Flow Sensitive profile file name."), cl::Hidden);
+// Specify Remapping file for FSProfile.
+static cl::opt<std::string> FSRemappingFile(
+ "fs-remapping-file", cl::init(""), cl::value_desc("filename"),
+ cl::desc("Flow Sensitive profile remapping file name."), cl::Hidden);
// Temporary option to allow experimenting with MachineScheduler as a post-RA
// scheduler. Targets can "properly" enable this with
@@ -308,6 +326,28 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID,
return TargetID;
}
+// Find the FSProfile file name. The internal option takes the precedence
+// before getting from TargetMachine.
+static const std::string getFSProfileFile(const TargetMachine *TM) {
+ if (!FSProfileFile.empty())
+ return FSProfileFile.getValue();
+ const Optional<PGOOptions> &PGOOpt = TM->getPGOOption();
+ if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse)
+ return std::string();
+ return PGOOpt->ProfileFile;
+}
+
+// Find the Profile remapping file name. The internal option takes the
+// precedence before getting from TargetMachine.
+static const std::string getFSRemappingFile(const TargetMachine *TM) {
+ if (!FSRemappingFile.empty())
+ return FSRemappingFile.getValue();
+ const Optional<PGOOptions> &PGOOpt = TM->getPGOOption();
+ if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse)
+ return std::string();
+ return PGOOpt->ProfileRemappingFile;
+}
+
//===---------------------------------------------------------------------===//
/// TargetPassConfig
//===---------------------------------------------------------------------===//
@@ -321,12 +361,9 @@ namespace {
struct InsertedPass {
AnalysisID TargetPassID;
IdentifyingPassPtr InsertedPassID;
- bool VerifyAfter;
- InsertedPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID,
- bool VerifyAfter)
- : TargetPassID(TargetPassID), InsertedPassID(InsertedPassID),
- VerifyAfter(VerifyAfter) {}
+ InsertedPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID)
+ : TargetPassID(TargetPassID), InsertedPassID(InsertedPassID) {}
Pass *getInsertedPass() const {
assert(InsertedPassID.isValid() && "Illegal Pass ID!");
@@ -601,14 +638,13 @@ CodeGenOpt::Level TargetPassConfig::getOptLevel() const {
/// Insert InsertedPassID pass after TargetPassID.
void TargetPassConfig::insertPass(AnalysisID TargetPassID,
- IdentifyingPassPtr InsertedPassID,
- bool VerifyAfter) {
+ IdentifyingPassPtr InsertedPassID) {
assert(((!InsertedPassID.isInstance() &&
TargetPassID != InsertedPassID.getID()) ||
(InsertedPassID.isInstance() &&
TargetPassID != InsertedPassID.getInstance()->getPassID())) &&
"Insert a pass after itself!");
- Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID, VerifyAfter);
+ Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID);
}
/// createPassConfig - Create a pass configuration object to be used by
@@ -686,7 +722,7 @@ bool TargetPassConfig::isPassSubstitutedOrOverridden(AnalysisID ID) const {
/// a later pass or that it should stop after an earlier pass, then do not add
/// the pass. Finally, compare the current pass against the StartAfter
/// and StopAfter options and change the Started/Stopped flags accordingly.
-void TargetPassConfig::addPass(Pass *P, bool verifyAfter) {
+void TargetPassConfig::addPass(Pass *P) {
assert(!Initialized && "PassConfig is immutable");
// Cache the Pass ID here in case the pass manager finds this pass is
@@ -704,16 +740,16 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter) {
addMachinePrePasses();
std::string Banner;
// Construct banner message before PM->add() as that may delete the pass.
- if (AddingMachinePasses && verifyAfter)
+ if (AddingMachinePasses)
Banner = std::string("After ") + std::string(P->getPassName());
PM->add(P);
if (AddingMachinePasses)
- addMachinePostPasses(Banner, /*AllowVerify*/ verifyAfter);
+ addMachinePostPasses(Banner);
// Add the passes after the pass P if there is any.
for (const auto &IP : Impl->InsertedPasses) {
if (IP.TargetPassID == PassID)
- addPass(IP.getInsertedPass(), IP.VerifyAfter);
+ addPass(IP.getInsertedPass());
}
} else {
delete P;
@@ -733,7 +769,7 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter) {
///
/// addPass cannot return a pointer to the pass instance because is internal the
/// PassManager and the instance we create here may already be freed.
-AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter) {
+AnalysisID TargetPassConfig::addPass(AnalysisID PassID) {
IdentifyingPassPtr TargetID = getPassSubstitution(PassID);
IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID);
if (!FinalPtr.isValid())
@@ -748,7 +784,7 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter) {
llvm_unreachable("Pass ID not registered");
}
AnalysisID FinalID = P->getPassID();
- addPass(P, verifyAfter); // Ends the lifetime of P.
+ addPass(P); // Ends the lifetime of P.
return FinalID;
}
@@ -792,8 +828,7 @@ void TargetPassConfig::addMachinePrePasses(bool AllowDebugify) {
addDebugifyPass();
}
-void TargetPassConfig::addMachinePostPasses(const std::string &Banner,
- bool AllowVerify, bool AllowStrip) {
+void TargetPassConfig::addMachinePostPasses(const std::string &Banner) {
if (DebugifyIsSafe) {
if (DebugifyCheckAndStripAll == cl::BOU_TRUE) {
addCheckDebugPass();
@@ -801,8 +836,7 @@ void TargetPassConfig::addMachinePostPasses(const std::string &Banner,
} else if (DebugifyAndStripAll == cl::BOU_TRUE)
addStripDebugPass();
}
- if (AllowVerify)
- addVerifyPass(Banner);
+ addVerifyPass(Banner);
}
/// Add common target configurable passes that perform LLVM IR to IR transforms
@@ -1113,6 +1147,18 @@ void TargetPassConfig::addMachinePasses() {
// where it becomes safe again so stop debugifying here.
DebugifyIsSafe = false;
+ // Add a FSDiscriminator pass right before RA, so that we could get
+ // more precise SampleFDO profile for RA.
+ if (EnableFSDiscriminator) {
+ addPass(createMIRAddFSDiscriminatorsPass(
+ sampleprof::FSDiscriminatorPass::Pass1));
+ const std::string ProfileFile = getFSProfileFile(TM);
+ if (!ProfileFile.empty() && !DisableRAFSProfileLoader)
+ addPass(
+ createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM),
+ sampleprof::FSDiscriminatorPass::Pass1));
+ }
+
// Run register allocation and passes that are tightly coupled with it,
// including phi elimination and scheduling.
if (getOptimizeRegAlloc())
@@ -1123,7 +1169,7 @@ void TargetPassConfig::addMachinePasses() {
// Run post-ra passes.
addPostRegAlloc();
- addPass(&RemoveRedundantDebugValuesID, false);
+ addPass(&RemoveRedundantDebugValuesID);
addPass(&FixupStatepointCallerSavedID);
@@ -1165,7 +1211,7 @@ void TargetPassConfig::addMachinePasses() {
// GC
if (addGCPasses()) {
if (PrintGCInfo)
- addPass(createGCInfoPrinter(dbgs()), false);
+ addPass(createGCInfoPrinter(dbgs()));
}
// Basic block placement.
@@ -1195,10 +1241,10 @@ void TargetPassConfig::addMachinePasses() {
// FIXME: Some backends are incompatible with running the verifier after
// addPreEmitPass. Maybe only pass "false" here for those targets?
- addPass(&FuncletLayoutID, false);
+ addPass(&FuncletLayoutID);
- addPass(&StackMapLivenessID, false);
- addPass(&LiveDebugValuesID, false);
+ addPass(&StackMapLivenessID);
+ addPass(&LiveDebugValuesID);
if (TM->Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None &&
EnableMachineOutliner != RunOutliner::NeverOutline) {
@@ -1224,10 +1270,6 @@ void TargetPassConfig::addMachinePasses() {
// Add passes that directly emit MI after all other MI passes.
addPreEmitPass2();
- // Insert pseudo probe annotation for callsite profiling
- if (TM->Options.PseudoProbeForProfiling)
- addPass(createPseudoProbeInserter());
-
AddingMachinePasses = false;
}
@@ -1369,8 +1411,8 @@ bool TargetPassConfig::usingDefaultRegAlloc() const {
/// Add the minimum set of target-independent passes that are required for
/// register allocation. No coalescing or scheduling.
void TargetPassConfig::addFastRegAlloc() {
- addPass(&PHIEliminationID, false);
- addPass(&TwoAddressInstructionPassID, false);
+ addPass(&PHIEliminationID);
+ addPass(&TwoAddressInstructionPassID);
addRegAssignAndRewriteFast();
}
@@ -1379,9 +1421,9 @@ void TargetPassConfig::addFastRegAlloc() {
/// optimized register allocation, including coalescing, machine instruction
/// scheduling, and register allocation itself.
void TargetPassConfig::addOptimizedRegAlloc() {
- addPass(&DetectDeadLanesID, false);
+ addPass(&DetectDeadLanesID);
- addPass(&ProcessImplicitDefsID, false);
+ addPass(&ProcessImplicitDefsID);
// LiveVariables currently requires pure SSA form.
//
@@ -1393,18 +1435,18 @@ void TargetPassConfig::addOptimizedRegAlloc() {
// When LiveVariables is removed this has to be removed/moved either.
// Explicit addition of UnreachableMachineBlockElim allows stopping before or
// after it with -stop-before/-stop-after.
- addPass(&UnreachableMachineBlockElimID, false);
- addPass(&LiveVariablesID, false);
+ addPass(&UnreachableMachineBlockElimID);
+ addPass(&LiveVariablesID);
// Edge splitting is smarter with machine loop info.
- addPass(&MachineLoopInfoID, false);
- addPass(&PHIEliminationID, false);
+ addPass(&MachineLoopInfoID);
+ addPass(&PHIEliminationID);
// Eventually, we want to run LiveIntervals before PHI elimination.
if (EarlyLiveIntervals)
- addPass(&LiveIntervalsID, false);
+ addPass(&LiveIntervalsID);
- addPass(&TwoAddressInstructionPassID, false);
+ addPass(&TwoAddressInstructionPassID);
addPass(&RegisterCoalescerID);
// The machine scheduler may accidentally create disconnected components
@@ -1417,9 +1459,6 @@ void TargetPassConfig::addOptimizedRegAlloc() {
if (addRegAssignAndRewriteOptimized()) {
// Perform stack slot coloring and post-ra machine LICM.
- //
- // FIXME: Re-enable coloring with register when it's capable of adding
- // kill markers.
addPass(&StackSlotColoringID);
// Allow targets to expand pseudo instructions depending on the choice of
@@ -1459,12 +1498,21 @@ void TargetPassConfig::addMachineLateOptimization() {
/// Add standard GC passes.
bool TargetPassConfig::addGCPasses() {
- addPass(&GCMachineCodeAnalysisID, false);
+ addPass(&GCMachineCodeAnalysisID);
return true;
}
/// Add standard basic block placement passes.
void TargetPassConfig::addBlockPlacement() {
+ if (EnableFSDiscriminator) {
+ addPass(createMIRAddFSDiscriminatorsPass(
+ sampleprof::FSDiscriminatorPass::Pass2));
+ const std::string ProfileFile = getFSProfileFile(TM);
+ if (!ProfileFile.empty() && !DisableLayoutFSProfileLoader)
+ addPass(
+ createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM),
+ sampleprof::FSDiscriminatorPass::Pass2));
+ }
if (addPass(&MachineBlockPlacementID)) {
// Run a separate pass to collect block placement statistics.
if (EnableBlockPlacementStats)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 1664b4dadfec..46cec5407565 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -118,6 +118,8 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
// registers. e.g. r1 = move v1024.
DenseMap<Register, Register> DstRegMap;
+ void removeClobberedSrcRegMap(MachineInstr *MI);
+
bool isRevCopyChain(Register FromReg, Register ToReg, int Maxlen);
bool noUseAfterLastDef(Register Reg, unsigned Dist, unsigned &LastDef);
@@ -132,7 +134,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
bool convertInstTo3Addr(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi, Register RegA,
- Register RegB, unsigned Dist);
+ Register RegB, unsigned &Dist);
bool isDefTooClose(Register Reg, unsigned Dist, MachineInstr *MI);
@@ -144,7 +146,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
bool tryInstructionTransform(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
unsigned SrcIdx, unsigned DstIdx,
- unsigned Dist, bool shouldOnlyCommute);
+ unsigned &Dist, bool shouldOnlyCommute);
bool tryInstructionCommute(MachineInstr *MI,
unsigned DstOpIdx,
@@ -380,7 +382,8 @@ findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
if (!MRI->hasOneNonDBGUse(Reg))
// None or more than one use.
return nullptr;
- MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(Reg);
+ MachineOperand &UseOp = *MRI->use_nodbg_begin(Reg);
+ MachineInstr &UseMI = *UseOp.getParent();
if (UseMI.getParent() != MBB)
return nullptr;
Register SrcReg;
@@ -394,6 +397,18 @@ findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
IsDstPhys = DstReg.isPhysical();
return &UseMI;
}
+ if (UseMI.isCommutable()) {
+ unsigned Src1 = TargetInstrInfo::CommuteAnyOperandIndex;
+ unsigned Src2 = UseMI.getOperandNo(&UseOp);
+ if (TII->findCommutedOpIndices(UseMI, Src1, Src2)) {
+ MachineOperand &MO = UseMI.getOperand(Src1);
+ if (MO.isReg() && MO.isUse() &&
+ isTwoAddrUse(UseMI, MO.getReg(), DstReg)) {
+ IsDstPhys = DstReg.isPhysical();
+ return &UseMI;
+ }
+ }
+ }
return nullptr;
}
@@ -422,6 +437,76 @@ static bool regsAreCompatible(Register RegA, Register RegB,
return TRI->regsOverlap(RegA, RegB);
}
+/// From RegMap remove entries mapped to a physical register which overlaps MO.
+static void removeMapRegEntry(const MachineOperand &MO,
+ DenseMap<Register, Register> &RegMap,
+ const TargetRegisterInfo *TRI) {
+ assert(
+ (MO.isReg() || MO.isRegMask()) &&
+ "removeMapRegEntry must be called with a register or regmask operand.");
+
+ SmallVector<Register, 2> Srcs;
+ for (auto SI : RegMap) {
+ Register ToReg = SI.second;
+ if (ToReg.isVirtual())
+ continue;
+
+ if (MO.isReg()) {
+ Register Reg = MO.getReg();
+ if (TRI->regsOverlap(ToReg, Reg))
+ Srcs.push_back(SI.first);
+ } else if (MO.clobbersPhysReg(ToReg))
+ Srcs.push_back(SI.first);
+ }
+
+ for (auto SrcReg : Srcs)
+ RegMap.erase(SrcReg);
+}
+
+/// If a physical register is clobbered, old entries mapped to it should be
+/// deleted. For example
+///
+/// %2:gr64 = COPY killed $rdx
+/// MUL64r %3:gr64, implicit-def $rax, implicit-def $rdx
+///
+/// After the MUL instruction, $rdx contains different value than in the COPY
+/// instruction. So %2 should not map to $rdx after MUL.
+void TwoAddressInstructionPass::removeClobberedSrcRegMap(MachineInstr *MI) {
+ if (MI->isCopy()) {
+ // If a virtual register is copied to its mapped physical register, it
+ // doesn't change the potential coalescing between them, so we don't remove
+ // entries mapped to the physical register. For example
+ //
+ // %100 = COPY $r8
+ // ...
+ // $r8 = COPY %100
+ //
+ // The first copy constructs SrcRegMap[%100] = $r8, the second copy doesn't
+ // destroy the content of $r8, and should not impact SrcRegMap.
+ Register Dst = MI->getOperand(0).getReg();
+ if (!Dst || Dst.isVirtual())
+ return;
+
+ Register Src = MI->getOperand(1).getReg();
+ if (regsAreCompatible(Dst, getMappedReg(Src, SrcRegMap), TRI))
+ return;
+ }
+
+ for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask()) {
+ removeMapRegEntry(MO, SrcRegMap, TRI);
+ continue;
+ }
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg || Reg.isVirtual())
+ continue;
+ removeMapRegEntry(MO, SrcRegMap, TRI);
+ }
+}
+
// Returns true if Reg is equal or aliased to at least one register in Set.
static bool regOverlapsSet(const SmallVectorImpl<Register> &Set, Register Reg,
const TargetRegisterInfo *TRI) {
@@ -589,21 +674,15 @@ bool TwoAddressInstructionPass::isProfitableToConv3Addr(Register RegA,
/// Return true if this transformation was successful.
bool TwoAddressInstructionPass::convertInstTo3Addr(
MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi,
- Register RegA, Register RegB, unsigned Dist) {
- // FIXME: Why does convertToThreeAddress() need an iterator reference?
- MachineFunction::iterator MFI = MBB->getIterator();
- MachineInstr *NewMI = TII->convertToThreeAddress(MFI, *mi, LV);
- assert(MBB->getIterator() == MFI &&
- "convertToThreeAddress changed iterator reference");
+ Register RegA, Register RegB, unsigned &Dist) {
+ MachineInstrSpan MIS(mi, MBB);
+ MachineInstr *NewMI = TII->convertToThreeAddress(*mi, LV, LIS);
if (!NewMI)
return false;
LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
- if (LIS)
- LIS->ReplaceMachineInstrInMaps(*mi, *NewMI);
-
// If the old instruction is debug value tracked, an update is required.
if (auto OldInstrNum = mi->peekDebugInstrNum()) {
// Sanity check.
@@ -624,7 +703,9 @@ bool TwoAddressInstructionPass::convertInstTo3Addr(
MBB->erase(mi); // Nuke the old inst.
- DistanceMap.insert(std::make_pair(NewMI, Dist));
+ for (MachineInstr &MI : MIS)
+ DistanceMap.insert(std::make_pair(&MI, Dist++));
+ Dist--;
mi = NewMI;
nmi = std::next(mi);
@@ -656,9 +737,7 @@ void TwoAddressInstructionPass::scanUses(Register DstReg) {
VirtRegPairs.push_back(NewReg);
break;
}
- bool isNew = SrcRegMap.insert(std::make_pair(NewReg, Reg)).second;
- if (!isNew)
- assert(SrcRegMap[NewReg] == Reg && "Can't map to two src registers!");
+ SrcRegMap[NewReg] = Reg;
VirtRegPairs.push_back(NewReg);
Reg = NewReg;
}
@@ -667,8 +746,7 @@ void TwoAddressInstructionPass::scanUses(Register DstReg) {
unsigned ToReg = VirtRegPairs.back();
VirtRegPairs.pop_back();
while (!VirtRegPairs.empty()) {
- unsigned FromReg = VirtRegPairs.back();
- VirtRegPairs.pop_back();
+ unsigned FromReg = VirtRegPairs.pop_back_val();
bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second;
if (!isNew)
assert(DstRegMap[FromReg] == ToReg &&"Can't map to two dst registers!");
@@ -857,12 +935,13 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill(
nmi = End;
MachineBasicBlock::iterator InsertPos = KillPos;
if (LIS) {
- // We have to move the copies first so that the MBB is still well-formed
- // when calling handleMove().
+ // We have to move the copies (and any interleaved debug instructions)
+ // first so that the MBB is still well-formed when calling handleMove().
for (MachineBasicBlock::iterator MBBI = AfterMI; MBBI != End;) {
auto CopyMI = MBBI++;
MBB->splice(InsertPos, MBB, CopyMI);
- LIS->handleMove(*CopyMI);
+ if (!CopyMI->isDebugOrPseudoInstr())
+ LIS->handleMove(*CopyMI);
InsertPos = CopyMI;
}
End = std::next(MachineBasicBlock::iterator(MI));
@@ -1130,7 +1209,7 @@ bool TwoAddressInstructionPass::
tryInstructionTransform(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
unsigned SrcIdx, unsigned DstIdx,
- unsigned Dist, bool shouldOnlyCommute) {
+ unsigned &Dist, bool shouldOnlyCommute) {
if (OptLevel == CodeGenOpt::None)
return false;
@@ -1238,6 +1317,8 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
// look "normal" to the transformation logic.
MBB->insert(mi, NewMIs[0]);
MBB->insert(mi, NewMIs[1]);
+ DistanceMap.insert(std::make_pair(NewMIs[0], Dist++));
+ DistanceMap.insert(std::make_pair(NewMIs[1], Dist));
LLVM_DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0]
<< "2addr: NEW INST: " << *NewMIs[1]);
@@ -1288,9 +1369,12 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
if (MO.isReg())
OrigRegs.push_back(MO.getReg());
}
+
+ LIS->RemoveMachineInstrFromMaps(MI);
}
MI.eraseFromParent();
+ DistanceMap.erase(&MI);
// Update LiveIntervals.
if (LIS) {
@@ -1307,6 +1391,9 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
LLVM_DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
NewMIs[0]->eraseFromParent();
NewMIs[1]->eraseFromParent();
+ DistanceMap.erase(NewMIs[0]);
+ DistanceMap.erase(NewMIs[1]);
+ Dist--;
}
}
}
@@ -1320,7 +1407,6 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
// Return true if any tied operands where found, including the trivial ones.
bool TwoAddressInstructionPass::
collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
- const MCInstrDesc &MCID = MI->getDesc();
bool AnyOps = false;
unsigned NumOps = MI->getNumOperands();
@@ -1342,10 +1428,10 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
// Deal with undef uses immediately - simply rewrite the src operand.
if (SrcMO.isUndef() && !DstMO.getSubReg()) {
// Constrain the DstReg register class if required.
- if (DstReg.isVirtual())
- if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
- TRI, *MF))
- MRI->constrainRegClass(DstReg, RC);
+ if (DstReg.isVirtual()) {
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ MRI->constrainRegClass(DstReg, RC);
+ }
SrcMO.setReg(DstReg);
SrcMO.setSubReg(0);
LLVM_DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI);
@@ -1434,12 +1520,24 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
if (LIS) {
LastCopyIdx = LIS->InsertMachineInstrInMaps(*PrevMI).getRegSlot();
+ SlotIndex endIdx =
+ LIS->getInstructionIndex(*MI).getRegSlot(IsEarlyClobber);
if (RegA.isVirtual()) {
LiveInterval &LI = LIS->getInterval(RegA);
VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
- SlotIndex endIdx =
- LIS->getInstructionIndex(*MI).getRegSlot(IsEarlyClobber);
- LI.addSegment(LiveInterval::Segment(LastCopyIdx, endIdx, VNI));
+ LI.addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI));
+ for (auto &S : LI.subranges()) {
+ VNI = S.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
+ S.addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI));
+ }
+ } else {
+ for (MCRegUnitIterator Unit(RegA, TRI); Unit.isValid(); ++Unit) {
+ if (LiveRange *LR = LIS->getCachedRegUnit(*Unit)) {
+ VNInfo *VNI =
+ LR->getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
+ LR->addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI));
+ }
+ }
}
}
@@ -1461,49 +1559,58 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
// by SubRegB is compatible with RegA with no subregister. So regardless of
// whether the dest oper writes a subreg, the source oper should not.
MO.setSubReg(0);
-
- // Propagate SrcRegMap.
- SrcRegMap[RegA] = RegB;
}
if (AllUsesCopied) {
- bool ReplacedAllUntiedUses = true;
- if (!IsEarlyClobber) {
- // Replace other (un-tied) uses of regB with LastCopiedReg.
- for (MachineOperand &MO : MI->operands()) {
- if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
- if (MO.getSubReg() == SubRegB) {
- if (MO.isKill()) {
- MO.setIsKill(false);
- RemovedKillFlag = true;
- }
- MO.setReg(LastCopiedReg);
- MO.setSubReg(0);
- } else {
- ReplacedAllUntiedUses = false;
+ LaneBitmask RemainingUses = LaneBitmask::getNone();
+ // Replace other (un-tied) uses of regB with LastCopiedReg.
+ for (MachineOperand &MO : MI->operands()) {
+ if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ if (MO.getSubReg() == SubRegB && !IsEarlyClobber) {
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
}
+ MO.setReg(LastCopiedReg);
+ MO.setSubReg(0);
+ } else {
+ RemainingUses |= TRI->getSubRegIndexLaneMask(MO.getSubReg());
}
}
}
// Update live variables for regB.
- if (RemovedKillFlag && ReplacedAllUntiedUses &&
- LV && LV->getVarInfo(RegB).removeKill(*MI)) {
+ if (RemovedKillFlag && RemainingUses.none() && LV &&
+ LV->getVarInfo(RegB).removeKill(*MI)) {
MachineBasicBlock::iterator PrevMI = MI;
--PrevMI;
LV->addVirtualRegisterKilled(RegB, *PrevMI);
}
+ if (RemovedKillFlag && RemainingUses.none())
+ SrcRegMap[LastCopiedReg] = RegB;
+
// Update LiveIntervals.
if (LIS) {
- LiveInterval &LI = LIS->getInterval(RegB);
- SlotIndex MIIdx = LIS->getInstructionIndex(*MI);
- LiveInterval::const_iterator I = LI.find(MIIdx);
- assert(I != LI.end() && "RegB must be live-in to use.");
+ SlotIndex UseIdx = LIS->getInstructionIndex(*MI);
+ auto Shrink = [=](LiveRange &LR, LaneBitmask LaneMask) {
+ LiveRange::Segment *S = LR.getSegmentContaining(LastCopyIdx);
+ if (!S)
+ return true;
+ if ((LaneMask & RemainingUses).any())
+ return false;
+ if (S->end.getBaseIndex() != UseIdx)
+ return false;
+ S->end = LastCopyIdx;
+ return true;
+ };
- SlotIndex UseIdx = MIIdx.getRegSlot(IsEarlyClobber);
- if (I->end == UseIdx)
- LI.removeSegment(LastCopyIdx, UseIdx);
+ LiveInterval &LI = LIS->getInterval(RegB);
+ bool ShrinkLI = true;
+ for (auto &S : LI.subranges())
+ ShrinkLI &= Shrink(S, S.LaneMask);
+ if (ShrinkLI)
+ Shrink(LI, LaneBitmask::getAll());
}
} else if (RemovedKillFlag) {
// Some tied uses of regB matched their destination registers, so
@@ -1580,6 +1687,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
// First scan through all the tied register uses in this instruction
// and record a list of pairs of tied operands for each register.
if (!collectTiedOperands(&*mi, TiedOperands)) {
+ removeClobberedSrcRegMap(&*mi);
mi = nmi;
continue;
}
@@ -1604,6 +1712,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
// The tied operands have been eliminated or shifted further down
// the block to ease elimination. Continue processing with 'nmi'.
TiedOperands.clear();
+ removeClobberedSrcRegMap(&*mi);
mi = nmi;
continue;
}
@@ -1628,18 +1737,44 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
mi->RemoveOperand(1);
mi->setDesc(TII->get(TargetOpcode::COPY));
LLVM_DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
+
+ // Update LiveIntervals.
+ if (LIS) {
+ Register Reg = mi->getOperand(0).getReg();
+ LiveInterval &LI = LIS->getInterval(Reg);
+ if (LI.hasSubRanges()) {
+ // The COPY no longer defines subregs of %reg except for
+ // %reg.subidx.
+ LaneBitmask LaneMask =
+ TRI->getSubRegIndexLaneMask(mi->getOperand(0).getSubReg());
+ SlotIndex Idx = LIS->getInstructionIndex(*mi);
+ for (auto &S : LI.subranges()) {
+ if ((S.LaneMask & LaneMask).none()) {
+ LiveRange::iterator UseSeg = S.FindSegmentContaining(Idx);
+ LiveRange::iterator DefSeg = std::next(UseSeg);
+ S.MergeValueNumberInto(DefSeg->valno, UseSeg->valno);
+ }
+ }
+
+ // The COPY no longer has a use of %reg.
+ LIS->shrinkToUses(&LI);
+ } else {
+ // The live interval for Reg did not have subranges but now it needs
+ // them because we have introduced a subreg def. Recompute it.
+ LIS->removeInterval(Reg);
+ LIS->createAndComputeVirtRegInterval(Reg);
+ }
+ }
}
// Clear TiedOperands here instead of at the top of the loop
// since most instructions do not have tied operands.
TiedOperands.clear();
+ removeClobberedSrcRegMap(&*mi);
mi = nmi;
}
}
- if (LIS)
- MF->verify(this, "After two-address instruction pass");
-
return MadeChange;
}
@@ -1722,6 +1857,9 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
for (int j = MI.getNumOperands() - 1, ee = 0; j > ee; --j)
MI.RemoveOperand(j);
} else {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(MI);
+
LLVM_DEBUG(dbgs() << "Eliminated: " << MI);
MI.eraseFromParent();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
index 2ce6ea1d4212..d042deefd746 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
@@ -108,7 +108,7 @@ class IRPromoter {
SetVector<Value*> &Visited;
SetVector<Value*> &Sources;
SetVector<Instruction*> &Sinks;
- SmallVectorImpl<Instruction*> &SafeWrap;
+ SmallPtrSetImpl<Instruction *> &SafeWrap;
IntegerType *ExtTy = nullptr;
SmallPtrSet<Value*, 8> NewInsts;
SmallPtrSet<Instruction*, 4> InstsToRemove;
@@ -116,7 +116,6 @@ class IRPromoter {
SmallPtrSet<Value*, 8> Promoted;
void ReplaceAllUsersOfWith(Value *From, Value *To);
- void PrepareWrappingAdds(void);
void ExtendSources(void);
void ConvertTruncs(void);
void PromoteTree(void);
@@ -125,11 +124,11 @@ class IRPromoter {
public:
IRPromoter(LLVMContext &C, IntegerType *Ty, unsigned Width,
- SetVector<Value*> &visited, SetVector<Value*> &sources,
- SetVector<Instruction*> &sinks,
- SmallVectorImpl<Instruction*> &wrap) :
- Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited),
- Sources(sources), Sinks(sinks), SafeWrap(wrap) {
+ SetVector<Value *> &visited, SetVector<Value *> &sources,
+ SetVector<Instruction *> &sinks,
+ SmallPtrSetImpl<Instruction *> &wrap)
+ : Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited),
+ Sources(sources), Sinks(sinks), SafeWrap(wrap) {
ExtTy = IntegerType::get(Ctx, PromotedWidth);
assert(OrigTy->getPrimitiveSizeInBits().getFixedSize() <
ExtTy->getPrimitiveSizeInBits().getFixedSize() &&
@@ -145,7 +144,7 @@ class TypePromotion : public FunctionPass {
unsigned RegisterBitWidth = 0;
SmallPtrSet<Value*, 16> AllVisited;
SmallPtrSet<Instruction*, 8> SafeToPromote;
- SmallVector<Instruction*, 4> SafeWrap;
+ SmallPtrSet<Instruction *, 4> SafeWrap;
// Does V have the same size result type as TypeSize.
bool EqualTypeSize(Value *V);
@@ -183,6 +182,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
}
StringRef getPassName() const override { return PASS_NAME; }
@@ -192,11 +192,8 @@ public:
}
-static bool GenerateSignBits(Value *V) {
- if (!isa<Instruction>(V))
- return false;
-
- unsigned Opc = cast<Instruction>(V)->getOpcode();
+static bool GenerateSignBits(Instruction *I) {
+ unsigned Opc = I->getOpcode();
return Opc == Instruction::AShr || Opc == Instruction::SDiv ||
Opc == Instruction::SRem || Opc == Instruction::SExt;
}
@@ -283,7 +280,7 @@ bool TypePromotion::isSafeWrap(Instruction *I) {
// wrap in respect to itself in the original bitwidth. If it doesn't wrap,
// just underflows the range, the icmp would give the same result whether the
// result has been truncated or not. We calculate this by:
- // - Zero extending both constants, if needed, to 32-bits.
+ // - Zero extending both constants, if needed, to RegisterBitWidth.
// - Take the absolute value of I's constant, adding this to the icmp const.
// - Check that this value is not out of range for small type. If it is, it
// means that it has underflowed enough to wrap around the icmp constant.
@@ -335,53 +332,46 @@ bool TypePromotion::isSafeWrap(Instruction *I) {
if (Opc != Instruction::Add && Opc != Instruction::Sub)
return false;
- if (!I->hasOneUse() ||
- !isa<ICmpInst>(*I->user_begin()) ||
+ if (!I->hasOneUse() || !isa<ICmpInst>(*I->user_begin()) ||
!isa<ConstantInt>(I->getOperand(1)))
return false;
- ConstantInt *OverflowConst = cast<ConstantInt>(I->getOperand(1));
- bool NegImm = OverflowConst->isNegative();
- bool IsDecreasing = ((Opc == Instruction::Sub) && !NegImm) ||
- ((Opc == Instruction::Add) && NegImm);
- if (!IsDecreasing)
- return false;
-
// Don't support an icmp that deals with sign bits.
auto *CI = cast<ICmpInst>(*I->user_begin());
if (CI->isSigned() || CI->isEquality())
return false;
- ConstantInt *ICmpConst = nullptr;
+ ConstantInt *ICmpConstant = nullptr;
if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0)))
- ICmpConst = Const;
+ ICmpConstant = Const;
else if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1)))
- ICmpConst = Const;
+ ICmpConstant = Const;
else
return false;
- // Now check that the result can't wrap on itself.
- APInt Total = ICmpConst->getValue().getBitWidth() < 32 ?
- ICmpConst->getValue().zext(32) : ICmpConst->getValue();
-
- Total += OverflowConst->getValue().getBitWidth() < 32 ?
- OverflowConst->getValue().abs().zext(32) : OverflowConst->getValue().abs();
-
- APInt Max = APInt::getAllOnesValue(TypePromotion::TypeSize);
-
- if (Total.getBitWidth() > Max.getBitWidth()) {
- if (Total.ugt(Max.zext(Total.getBitWidth())))
- return false;
- } else if (Max.getBitWidth() > Total.getBitWidth()) {
- if (Total.zext(Max.getBitWidth()).ugt(Max))
- return false;
- } else if (Total.ugt(Max))
+ const APInt &ICmpConst = ICmpConstant->getValue();
+ APInt OverflowConst = cast<ConstantInt>(I->getOperand(1))->getValue();
+ if (Opc == Instruction::Sub)
+ OverflowConst = -OverflowConst;
+ if (!OverflowConst.isNonPositive())
return false;
- LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for "
- << *I << "\n");
- SafeWrap.push_back(I);
- return true;
+ // Using C1 = OverflowConst and C2 = ICmpConst, we can use either prove that:
+ // zext(x) + sext(C1) <u zext(C2) if C1 < 0 and C1 >s C2
+ // zext(x) + sext(C1) <u sext(C2) if C1 < 0 and C1 <=s C2
+ if (OverflowConst.sgt(ICmpConst)) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
+ << "const of " << *I << "\n");
+ SafeWrap.insert(I);
+ return true;
+ } else {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
+ << "const of " << *I << " and " << *CI << "\n");
+ SafeWrap.insert(I);
+ SafeWrap.insert(CI);
+ return true;
+ }
+ return false;
}
bool TypePromotion::shouldPromote(Value *V) {
@@ -403,17 +393,14 @@ bool TypePromotion::shouldPromote(Value *V) {
/// Return whether we can safely mutate V's type to ExtTy without having to be
/// concerned with zero extending or truncation.
-static bool isPromotedResultSafe(Value *V) {
- if (GenerateSignBits(V))
+static bool isPromotedResultSafe(Instruction *I) {
+ if (GenerateSignBits(I))
return false;
- if (!isa<Instruction>(V))
+ if (!isa<OverflowingBinaryOperator>(I))
return true;
- if (!isa<OverflowingBinaryOperator>(V))
- return true;
-
- return cast<Instruction>(V)->hasNoUnsignedWrap();
+ return I->hasNoUnsignedWrap();
}
void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
@@ -422,7 +409,7 @@ void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
bool ReplacedAll = true;
LLVM_DEBUG(dbgs() << "IR Promotion: Replacing " << *From << " with " << *To
- << "\n");
+ << "\n");
for (Use &U : From->uses()) {
auto *User = cast<Instruction>(U.getUser());
@@ -441,39 +428,6 @@ void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
InstsToRemove.insert(I);
}
-void IRPromoter::PrepareWrappingAdds() {
- LLVM_DEBUG(dbgs() << "IR Promotion: Prepare wrapping adds.\n");
- IRBuilder<> Builder{Ctx};
-
- // For adds that safely wrap and use a negative immediate as operand 1, we
- // create an equivalent instruction using a positive immediate.
- // That positive immediate can then be zext along with all the other
- // immediates later.
- for (auto *I : SafeWrap) {
- if (I->getOpcode() != Instruction::Add)
- continue;
-
- LLVM_DEBUG(dbgs() << "IR Promotion: Adjusting " << *I << "\n");
- assert((isa<ConstantInt>(I->getOperand(1)) &&
- cast<ConstantInt>(I->getOperand(1))->isNegative()) &&
- "Wrapping should have a negative immediate as the second operand");
-
- auto Const = cast<ConstantInt>(I->getOperand(1));
- auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs());
- Builder.SetInsertPoint(I);
- Value *NewVal = Builder.CreateSub(I->getOperand(0), NewConst);
- if (auto *NewInst = dyn_cast<Instruction>(NewVal)) {
- NewInst->copyIRFlags(I);
- NewInsts.insert(NewInst);
- }
- InstsToRemove.insert(I);
- I->replaceAllUsesWith(NewVal);
- LLVM_DEBUG(dbgs() << "IR Promotion: New equivalent: " << *NewVal << "\n");
- }
- for (auto *I : NewInsts)
- Visited.insert(I);
-}
-
void IRPromoter::ExtendSources() {
IRBuilder<> Builder{Ctx};
@@ -515,8 +469,6 @@ void IRPromoter::ExtendSources() {
void IRPromoter::PromoteTree() {
LLVM_DEBUG(dbgs() << "IR Promotion: Mutating the tree..\n");
- IRBuilder<> Builder{Ctx};
-
// Mutate the types of the instructions within the tree. Here we handle
// constant operands.
for (auto *V : Visited) {
@@ -533,14 +485,16 @@ void IRPromoter::PromoteTree() {
continue;
if (auto *Const = dyn_cast<ConstantInt>(Op)) {
- Constant *NewConst = ConstantExpr::getZExt(Const, ExtTy);
+ Constant *NewConst = SafeWrap.contains(I)
+ ? ConstantExpr::getSExt(Const, ExtTy)
+ : ConstantExpr::getZExt(Const, ExtTy);
I->setOperand(i, NewConst);
} else if (isa<UndefValue>(Op))
I->setOperand(i, UndefValue::get(ExtTy));
}
- // Mutate the result type, unless this is an icmp.
- if (!isa<ICmpInst>(I)) {
+ // Mutate the result type, unless this is an icmp or switch.
+ if (!isa<ICmpInst>(I) && !isa<SwitchInst>(I)) {
I->mutateType(ExtTy);
Promoted.insert(I);
}
@@ -575,7 +529,7 @@ void IRPromoter::TruncateSinks() {
// Handle calls separately as we need to iterate over arg operands.
if (auto *Call = dyn_cast<CallInst>(I)) {
- for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) {
+ for (unsigned i = 0; i < Call->arg_size(); ++i) {
Value *Arg = Call->getArgOperand(i);
Type *Ty = TruncTysMap[Call][i];
if (Instruction *Trunc = InsertTrunc(Arg, Ty)) {
@@ -678,10 +632,8 @@ void IRPromoter::Mutate() {
// Cache original types of the values that will likely need truncating
for (auto *I : Sinks) {
if (auto *Call = dyn_cast<CallInst>(I)) {
- for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) {
- Value *Arg = Call->getArgOperand(i);
+ for (Value *Arg : Call->args())
TruncTysMap[Call].push_back(Arg->getType());
- }
} else if (auto *Switch = dyn_cast<SwitchInst>(I))
TruncTysMap[I].push_back(Switch->getCondition()->getType());
else {
@@ -696,10 +648,6 @@ void IRPromoter::Mutate() {
TruncTysMap[Trunc].push_back(Trunc->getDestTy());
}
- // Convert adds using negative immediates to equivalent instructions that use
- // positive constants.
- PrepareWrappingAdds();
-
// Insert zext instructions between sources and their users.
ExtendSources();
@@ -798,7 +746,7 @@ bool TypePromotion::isLegalToPromote(Value *V) {
if (SafeToPromote.count(I))
return true;
- if (isPromotedResultSafe(V) || isSafeWrap(I)) {
+ if (isPromotedResultSafe(I) || isSafeWrap(I)) {
SafeToPromote.insert(I);
return true;
}
@@ -815,7 +763,7 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
return false;
LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V << ", from "
- << TypeSize << " bits to " << PromotedWidth << "\n");
+ << TypeSize << " bits to " << PromotedWidth << "\n");
SetVector<Value*> WorkList;
SetVector<Value*> Sources;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
index 0f164e2637a2..069aca742da0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -541,15 +541,8 @@ void VirtRegRewriter::rewrite() {
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
MBBI != MBBE; ++MBBI) {
LLVM_DEBUG(MBBI->print(dbgs(), Indexes));
- for (MachineBasicBlock::instr_iterator
- MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) {
- MachineInstr *MI = &*MII;
- ++MII;
-
- for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end(); MOI != MOE; ++MOI) {
- MachineOperand &MO = *MOI;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBBI->instrs())) {
+ for (MachineOperand &MO : MI.operands()) {
// Make sure MRI knows about registers clobbered by regmasks.
if (MO.isRegMask())
MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
@@ -574,7 +567,7 @@ void VirtRegRewriter::rewrite() {
// have to add implicit killed operands for the super-register. A
// partial redef always kills and redefines the super-register.
if ((MO.readsReg() && (MO.isDef() || MO.isKill())) ||
- (MO.isDef() && subRegLiveThrough(*MI, PhysReg)))
+ (MO.isDef() && subRegLiveThrough(MI, PhysReg)))
SuperKills.push_back(PhysReg);
if (MO.isDef()) {
@@ -619,20 +612,20 @@ void VirtRegRewriter::rewrite() {
// Add any missing super-register kills after rewriting the whole
// instruction.
while (!SuperKills.empty())
- MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true);
+ MI.addRegisterKilled(SuperKills.pop_back_val(), TRI, true);
while (!SuperDeads.empty())
- MI->addRegisterDead(SuperDeads.pop_back_val(), TRI, true);
+ MI.addRegisterDead(SuperDeads.pop_back_val(), TRI, true);
while (!SuperDefs.empty())
- MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI);
+ MI.addRegisterDefined(SuperDefs.pop_back_val(), TRI);
- LLVM_DEBUG(dbgs() << "> " << *MI);
+ LLVM_DEBUG(dbgs() << "> " << MI);
- expandCopyBundle(*MI);
+ expandCopyBundle(MI);
// We can remove identity copies right now.
- handleIdentityCopy(*MI);
+ handleIdentityCopy(MI);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
index c4c84cd921fa..c04a7b28eff9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
@@ -29,7 +29,7 @@
// __wasm_lpad_context.lpad_index = index;
// __wasm_lpad_context.lsda = wasm.lsda();
// _Unwind_CallPersonality(exn);
-// selector = __wasm.landingpad_context.selector;
+// selector = __wasm_lpad_context.selector;
// ...
//
//
@@ -329,7 +329,7 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality,
OperandBundleDef("funclet", CPI));
PersCI->setDoesNotThrow();
- // Pseudocode: int selector = __wasm.landingpad_context.selector;
+ // Pseudocode: int selector = __wasm_lpad_context.selector;
Instruction *Selector =
IRB.CreateLoad(IRB.getInt32Ty(), SelectorField, "selector");