summaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2022-07-27 19:50:45 +0000
committerDimitry Andric <dim@FreeBSD.org>2022-07-27 19:50:54 +0000
commit08e8dd7b9db7bb4a9de26d44c1cbfd24e869c014 (patch)
tree041e72e32710b1e742516d8c9f1575bf0116d3e3 /llvm/lib
parent4b4fe385e49bd883fd183b5f21c1ea486c722e61 (diff)
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/CodeMetrics.cpp3
-rw-r--r--llvm/lib/Analysis/InlineCost.cpp4
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp12
-rw-r--r--llvm/lib/Analysis/LoopAccessAnalysis.cpp4
-rw-r--r--llvm/lib/Analysis/MemoryBuiltins.cpp7
-rw-r--r--llvm/lib/Analysis/ModuleSummaryAnalysis.cpp3
-rw-r--r--llvm/lib/Analysis/PHITransAddr.cpp17
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp26
-rw-r--r--llvm/lib/Analysis/TypeMetadataUtils.cpp4
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp43
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeReader.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp7
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/WasmException.h2
-rw-r--r--llvm/lib/CodeGen/AtomicExpandPass.cpp7
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp4
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp3
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp10
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp5
-rw-r--r--llvm/lib/CodeGen/LiveRangeEdit.cpp16
-rw-r--r--llvm/lib/CodeGen/MachineFunctionPass.cpp29
-rw-r--r--llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp7
-rw-r--r--llvm/lib/CodeGen/ProcessImplicitDefs.cpp2
-rw-r--r--llvm/lib/CodeGen/RegAllocGreedy.cpp19
-rw-r--r--llvm/lib/CodeGen/RegAllocGreedy.h2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp90
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp41
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp17
-rw-r--r--llvm/lib/DWARFLinker/DWARFLinker.cpp53
-rw-r--r--llvm/lib/DWP/DWP.cpp7
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp124
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h10
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/COFF_x86_64.cpp179
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/SEHFrameSupport.h61
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/x86_64.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp38
-rw-r--r--llvm/lib/FileCheck/FileCheck.cpp2
-rw-r--r--llvm/lib/IR/Instructions.cpp4
-rw-r--r--llvm/lib/IR/IntrinsicInst.cpp33
-rw-r--r--llvm/lib/IR/ModuleSummaryIndex.cpp8
-rw-r--r--llvm/lib/IR/PrintPasses.cpp44
-rw-r--r--llvm/lib/LTO/LTO.cpp4
-rw-r--r--llvm/lib/LTO/LTOBackend.cpp3
-rw-r--r--llvm/lib/LTO/LTOCodeGenerator.cpp12
-rw-r--r--llvm/lib/LTO/ThinLTOCodeGenerator.cpp6
-rw-r--r--llvm/lib/MC/ELFObjectWriter.cpp5
-rw-r--r--llvm/lib/MC/MCDisassembler/MCDisassembler.cpp5
-rw-r--r--llvm/lib/MC/XCOFFObjectWriter.cpp2
-rw-r--r--llvm/lib/ObjCopy/ELF/ELFObject.cpp52
-rw-r--r--llvm/lib/ObjCopy/ELF/ELFObject.h14
-rw-r--r--llvm/lib/ObjectYAML/ELFYAML.cpp8
-rw-r--r--llvm/lib/Passes/PassBuilderPipelines.cpp3
-rw-r--r--llvm/lib/Passes/StandardInstrumentations.cpp58
-rw-r--r--llvm/lib/Support/ARMAttributeParser.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp144
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td6
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td32
-rw-r--r--llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp8
-rw-r--r--llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h3
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp4
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp4
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp4
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp4
-rw-r--r--llvm/lib/Target/AArch64/SVEInstrFormats.td12
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp212
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp818
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.h233
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.h4
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h8
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td56
-rw-r--r--llvm/lib/Target/AMDGPU/VOPCInstructions.td2
-rw-r--r--llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp30
-rw-r--r--llvm/lib/Target/DirectX/DXILOpBuilder.cpp324
-rw-r--r--llvm/lib/Target/DirectX/DXILOpBuilder.h46
-rw-r--r--llvm/lib/Target/DirectX/DXILOpLowering.cpp167
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLowering.h3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp46
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.h4
-rw-r--r--llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp2
-rw-r--r--llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp2
-rw-r--r--llvm/lib/Target/LoongArch/LoongArch.h2
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFrameLowering.h2
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h2
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.h2
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h2
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchSubtarget.h2
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h2
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp4
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h4
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h2
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h2
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp2
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h2
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h4
-rw-r--r--llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h2
-rw-r--r--llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp15
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp53
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h1
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.h21
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoC.td56
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetMachine.h3
-rw-r--r--llvm/lib/Target/SystemZ/SystemZRegisterInfo.h34
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp30
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp84
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h2
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.h2
-rw-r--r--llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp55
-rw-r--r--llvm/lib/Transforms/IPO/ArgumentPromotion.cpp6
-rw-r--r--llvm/lib/Transforms/IPO/AttributorAttributes.cpp28
-rw-r--r--llvm/lib/Transforms/IPO/FunctionAttrs.cpp61
-rw-r--r--llvm/lib/Transforms/IPO/GlobalOpt.cpp3
-rw-r--r--llvm/lib/Transforms/IPO/LowerTypeTests.cpp61
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp12
-rw-r--r--llvm/lib/Transforms/IPO/SCCP.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp8
-rw-r--r--llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp35
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h7
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp8
-rw-r--r--llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp25
-rw-r--r--llvm/lib/Transforms/Scalar/Reassociate.cpp24
-rw-r--r--llvm/lib/Transforms/Utils/InlineFunction.cpp59
-rw-r--r--llvm/lib/Transforms/Utils/MatrixUtils.cpp42
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp137
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp9
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp4
136 files changed, 2807 insertions, 1468 deletions
diff --git a/llvm/lib/Analysis/CodeMetrics.cpp b/llvm/lib/Analysis/CodeMetrics.cpp
index 6d9084215dee..ded842b92ae1 100644
--- a/llvm/lib/Analysis/CodeMetrics.cpp
+++ b/llvm/lib/Analysis/CodeMetrics.cpp
@@ -133,7 +133,8 @@ void CodeMetrics::analyzeBasicBlock(
// When preparing for LTO, liberally consider calls as inline
// candidates.
if (!Call->isNoInline() && IsLoweredToCall &&
- ((F->hasInternalLinkage() && F->hasOneUse()) || PrepareForLTO)) {
+ ((F->hasInternalLinkage() && F->hasOneLiveUse()) ||
+ PrepareForLTO)) {
++NumInlineCandidates;
}
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 9f8a5e472f01..8192ed56caf0 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -185,8 +185,8 @@ private:
public:
InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
- virtual void emitInstructionAnnot(const Instruction *I,
- formatted_raw_ostream &OS) override;
+ void emitInstructionAnnot(const Instruction *I,
+ formatted_raw_ostream &OS) override;
};
/// Carry out call site analysis, in order to evaluate inlinability.
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 4691aebbdfe1..21fe448218bc 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -1591,12 +1591,6 @@ static Value *simplifyAndOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) {
!match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B))))
return nullptr;
- // We have (icmp Pred0, A, B) & (icmp Pred1, A, B).
- // If Op1 is always implied true by Op0, then Op0 is a subset of Op1, and we
- // can eliminate Op1 from this 'and'.
- if (ICmpInst::isImpliedTrueByMatchingCmp(Pred0, Pred1))
- return Op0;
-
// Check for any combination of predicates that are guaranteed to be disjoint.
if ((Pred0 == ICmpInst::getInversePredicate(Pred1)) ||
(Pred0 == ICmpInst::ICMP_EQ && ICmpInst::isFalseWhenEqual(Pred1)) ||
@@ -1616,12 +1610,6 @@ static Value *simplifyOrOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) {
!match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B))))
return nullptr;
- // We have (icmp Pred0, A, B) | (icmp Pred1, A, B).
- // If Op1 is always implied true by Op0, then Op0 is a subset of Op1, and we
- // can eliminate Op0 from this 'or'.
- if (ICmpInst::isImpliedTrueByMatchingCmp(Pred0, Pred1))
- return Op1;
-
// Check for any combination of predicates that cover the entire range of
// possibilities.
if ((Pred0 == ICmpInst::getInversePredicate(Pred1)) ||
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index bed684b7652a..aa35f253bc5f 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1500,9 +1500,7 @@ bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
Value *Ptr0 = VL[0];
using DistOrdPair = std::pair<int64_t, int>;
- auto Compare = [](const DistOrdPair &L, const DistOrdPair &R) {
- return L.first < R.first;
- };
+ auto Compare = llvm::less_first();
std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
Offsets.emplace(0, 0);
int Cnt = 1;
diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp
index 31e4380e4379..413ec6dd4b42 100644
--- a/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -115,9 +115,7 @@ struct AllocFnsTy {
// FIXME: certain users need more information. E.g., SimplifyLibCalls needs to
// know which functions are nounwind, noalias, nocapture parameters, etc.
static const std::pair<LibFunc, AllocFnsTy> AllocationFnData[] = {
- {LibFunc_malloc, {MallocLike, 1, 0, -1, -1, MallocFamily::Malloc}},
{LibFunc_vec_malloc, {MallocLike, 1, 0, -1, -1, MallocFamily::VecMalloc}},
- {LibFunc_valloc, {MallocLike, 1, 0, -1, -1, MallocFamily::Malloc}},
{LibFunc_Znwj, {OpNewLike, 1, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned int)
{LibFunc_ZnwjRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned int, nothrow)
{LibFunc_ZnwjSt11align_val_t, {OpNewLike, 2, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned int, align_val_t)
@@ -142,13 +140,9 @@ static const std::pair<LibFunc, AllocFnsTy> AllocationFnData[] = {
{LibFunc_msvc_new_array_int_nothrow, {MallocLike, 2, 0, -1, -1, MallocFamily::MSVCArrayNew}}, // new[](unsigned int, nothrow)
{LibFunc_msvc_new_array_longlong, {OpNewLike, 1, 0, -1, -1, MallocFamily::MSVCArrayNew}}, // new[](unsigned long long)
{LibFunc_msvc_new_array_longlong_nothrow, {MallocLike, 2, 0, -1, -1, MallocFamily::MSVCArrayNew}}, // new[](unsigned long long, nothrow)
- {LibFunc_aligned_alloc, {AlignedAllocLike, 2, 1, -1, 0, MallocFamily::Malloc}},
{LibFunc_memalign, {AlignedAllocLike, 2, 1, -1, 0, MallocFamily::Malloc}},
- {LibFunc_calloc, {CallocLike, 2, 0, 1, -1, MallocFamily::Malloc}},
{LibFunc_vec_calloc, {CallocLike, 2, 0, 1, -1, MallocFamily::VecMalloc}},
- {LibFunc_realloc, {ReallocLike, 2, 1, -1, -1, MallocFamily::Malloc}},
{LibFunc_vec_realloc, {ReallocLike, 2, 1, -1, -1, MallocFamily::VecMalloc}},
- {LibFunc_reallocf, {ReallocLike, 2, 1, -1, -1, MallocFamily::Malloc}},
{LibFunc_strdup, {StrDupLike, 1, -1, -1, -1, MallocFamily::Malloc}},
{LibFunc_dunder_strdup, {StrDupLike, 1, -1, -1, -1, MallocFamily::Malloc}},
{LibFunc_strndup, {StrDupLike, 2, 1, -1, -1, MallocFamily::Malloc}},
@@ -488,7 +482,6 @@ struct FreeFnsTy {
// clang-format off
static const std::pair<LibFunc, FreeFnsTy> FreeFnData[] = {
- {LibFunc_free, {1, MallocFamily::Malloc}},
{LibFunc_vec_free, {1, MallocFamily::VecMalloc}},
{LibFunc_ZdlPv, {1, MallocFamily::CPPNew}}, // operator delete(void*)
{LibFunc_ZdaPv, {1, MallocFamily::CPPNewArray}}, // operator delete[](void*)
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index c52b27a38fe9..efe60586979a 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -164,7 +164,8 @@ static void addIntrinsicToSummary(
SetVector<FunctionSummary::ConstVCall> &TypeCheckedLoadConstVCalls,
DominatorTree &DT) {
switch (CI->getCalledFunction()->getIntrinsicID()) {
- case Intrinsic::type_test: {
+ case Intrinsic::type_test:
+ case Intrinsic::public_type_test: {
auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
if (!TypeId)
diff --git a/llvm/lib/Analysis/PHITransAddr.cpp b/llvm/lib/Analysis/PHITransAddr.cpp
index 7571bd0059cc..5b0fbca23891 100644
--- a/llvm/lib/Analysis/PHITransAddr.cpp
+++ b/llvm/lib/Analysis/PHITransAddr.cpp
@@ -21,6 +21,10 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+static cl::opt<bool> EnableAddPhiTranslation(
+ "gvn-add-phi-translation", cl::init(false), cl::Hidden,
+ cl::desc("Enable phi-translation of add instructions"));
+
static bool CanPHITrans(Instruction *Inst) {
if (isa<PHINode>(Inst) ||
isa<GetElementPtrInst>(Inst))
@@ -410,14 +414,14 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
return Result;
}
-#if 0
- // FIXME: This code works, but it is unclear that we actually want to insert
- // a big chain of computation in order to make a value available in a block.
- // This needs to be evaluated carefully to consider its cost trade offs.
-
// Handle add with a constant RHS.
- if (Inst->getOpcode() == Instruction::Add &&
+ if (EnableAddPhiTranslation && Inst->getOpcode() == Instruction::Add &&
isa<ConstantInt>(Inst->getOperand(1))) {
+
+ // FIXME: This code works, but it is unclear that we actually want to insert
+ // a big chain of computation in order to make a value available in a block.
+ // This needs to be evaluated carefully to consider its cost trade offs.
+
// PHI translate the LHS.
Value *OpVal = InsertPHITranslatedSubExpr(Inst->getOperand(0),
CurBB, PredBB, DT, NewInsts);
@@ -431,7 +435,6 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
NewInsts.push_back(Res);
return Res;
}
-#endif
return nullptr;
}
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index d46248aa3889..2958a5054afc 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -11153,20 +11153,6 @@ bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB,
return true;
}
- // Try to prove (Pred, LHS, RHS) using isImpliedViaGuard.
- auto ProveViaGuard = [&](const BasicBlock *Block) {
- if (isImpliedViaGuard(Block, Pred, LHS, RHS))
- return true;
- if (ProvingStrictComparison) {
- auto ProofFn = [&](ICmpInst::Predicate P) {
- return isImpliedViaGuard(Block, P, LHS, RHS);
- };
- if (SplitAndProve(ProofFn))
- return true;
- }
- return false;
- };
-
// Try to prove (Pred, LHS, RHS) using isImpliedCond.
auto ProveViaCond = [&](const Value *Condition, bool Inverse) {
const Instruction *CtxI = &BB->front();
@@ -11193,9 +11179,6 @@ bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB,
PredBB = BB->getSinglePredecessor();
for (std::pair<const BasicBlock *, const BasicBlock *> Pair(PredBB, BB);
Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
- if (ProveViaGuard(Pair.first))
- return true;
-
const BranchInst *BlockEntryPredicate =
dyn_cast<BranchInst>(Pair.first->getTerminator());
if (!BlockEntryPredicate || BlockEntryPredicate->isUnconditional())
@@ -11218,6 +11201,15 @@ bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB,
return true;
}
+ // Check conditions due to any @llvm.experimental.guard intrinsics.
+ auto *GuardDecl = F.getParent()->getFunction(
+ Intrinsic::getName(Intrinsic::experimental_guard));
+ if (GuardDecl)
+ for (const auto *GU : GuardDecl->users())
+ if (const auto *Guard = dyn_cast<IntrinsicInst>(GU))
+ if (Guard->getFunction() == BB->getParent() && DT.dominates(Guard, BB))
+ if (ProveViaCond(Guard->getArgOperand(0), false))
+ return true;
return false;
}
diff --git a/llvm/lib/Analysis/TypeMetadataUtils.cpp b/llvm/lib/Analysis/TypeMetadataUtils.cpp
index 201e64770766..e128187bac49 100644
--- a/llvm/lib/Analysis/TypeMetadataUtils.cpp
+++ b/llvm/lib/Analysis/TypeMetadataUtils.cpp
@@ -75,7 +75,9 @@ void llvm::findDevirtualizableCallsForTypeTest(
SmallVectorImpl<DevirtCallSite> &DevirtCalls,
SmallVectorImpl<CallInst *> &Assumes, const CallInst *CI,
DominatorTree &DT) {
- assert(CI->getCalledFunction()->getIntrinsicID() == Intrinsic::type_test);
+ assert(CI->getCalledFunction()->getIntrinsicID() == Intrinsic::type_test ||
+ CI->getCalledFunction()->getIntrinsicID() ==
+ Intrinsic::public_type_test);
const Module *M = CI->getParent()->getParent()->getParent();
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 1f3798d1338e..2dd671b4ab9e 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -4266,9 +4266,10 @@ bool llvm::getConstantDataArrayInfo(const Value *V,
return true;
}
-/// This function computes the length of a null-terminated C string pointed to
-/// by V. If successful, it returns true and returns the string in Str.
-/// If unsuccessful, it returns false.
+/// Extract bytes from the initializer of the constant array V, which need
+/// not be a nul-terminated string. On success, store the bytes in Str and
+/// return true. When TrimAtNul is set, Str will contain only the bytes up
+/// to but not including the first nul. Return false on failure.
bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
uint64_t Offset, bool TrimAtNul) {
ConstantDataArraySlice Slice;
@@ -6543,7 +6544,6 @@ bool llvm::matchSimpleRecurrence(const BinaryOperator *I, PHINode *&P,
static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS,
const Value *RHS, const DataLayout &DL,
unsigned Depth) {
- assert(!LHS->getType()->isVectorTy() && "TODO: extend to handle vectors!");
if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS)
return true;
@@ -6656,14 +6656,12 @@ static Optional<bool> isImpliedCondMatchingOperands(CmpInst::Predicate APred,
/// Return true if "icmp APred X, C1" implies "icmp BPred X, C2" is true.
/// Return false if "icmp APred X, C1" implies "icmp BPred X, C2" is false.
/// Otherwise, return None if we can't infer anything.
-static Optional<bool>
-isImpliedCondMatchingImmOperands(CmpInst::Predicate APred,
- const ConstantInt *C1,
- CmpInst::Predicate BPred,
- const ConstantInt *C2) {
- ConstantRange DomCR =
- ConstantRange::makeExactICmpRegion(APred, C1->getValue());
- ConstantRange CR = ConstantRange::makeExactICmpRegion(BPred, C2->getValue());
+static Optional<bool> isImpliedCondMatchingImmOperands(CmpInst::Predicate APred,
+ const APInt &C1,
+ CmpInst::Predicate BPred,
+ const APInt &C2) {
+ ConstantRange DomCR = ConstantRange::makeExactICmpRegion(APred, C1);
+ ConstantRange CR = ConstantRange::makeExactICmpRegion(BPred, C2);
ConstantRange Intersection = DomCR.intersectWith(CR);
ConstantRange Difference = DomCR.difference(CR);
if (Intersection.isEmptySet())
@@ -6701,14 +6699,9 @@ static Optional<bool> isImpliedCondICmps(const ICmpInst *LHS,
// Can we infer anything when the LHS operands match and the RHS operands are
// constants (not necessarily matching)?
- if (ALHS == BLHS && isa<ConstantInt>(ARHS) && isa<ConstantInt>(BRHS)) {
- if (Optional<bool> Implication = isImpliedCondMatchingImmOperands(
- APred, cast<ConstantInt>(ARHS), BPred, cast<ConstantInt>(BRHS)))
- return Implication;
- // No amount of additional analysis will infer the second condition, so
- // early exit.
- return None;
- }
+ const APInt *AC, *BC;
+ if (ALHS == BLHS && match(ARHS, m_APInt(AC)) && match(BRHS, m_APInt(BC)))
+ return isImpliedCondMatchingImmOperands(APred, *AC, BPred, *BC);
if (APred == BPred)
return isImpliedCondOperands(APred, ALHS, ARHS, BLHS, BRHS, DL, Depth);
@@ -6761,14 +6754,8 @@ llvm::isImpliedCondition(const Value *LHS, CmpInst::Predicate RHSPred,
if (RHSOp0->getType()->isVectorTy() != LHS->getType()->isVectorTy())
return None;
- Type *OpTy = LHS->getType();
- assert(OpTy->isIntOrIntVectorTy(1) && "Expected integer type only!");
-
- // FIXME: Extending the code below to handle vectors.
- if (OpTy->isVectorTy())
- return None;
-
- assert(OpTy->isIntegerTy(1) && "implied by above");
+ assert(LHS->getType()->isIntOrIntVectorTy(1) &&
+ "Expected integer type only!");
// Both LHS and RHS are icmps.
const ICmpInst *LHSCmp = dyn_cast<ICmpInst>(LHS);
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 1d6c21bd66d1..1943b5db94c3 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -7788,7 +7788,7 @@ static Expected<bool> getEnableSplitLTOUnitFlag(BitstreamCursor &Stream,
case bitc::FS_FLAGS: { // [flags]
uint64_t Flags = Record[0];
// Scan flags.
- assert(Flags <= 0x7f && "Unexpected bits in flag");
+ assert(Flags <= 0xff && "Unexpected bits in flag");
return Flags & 0x8;
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index e0050a47a6f6..32a10ad41d1f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2795,12 +2795,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
DL.getTypeAllocSize(Op->getType()).getFixedSize())
return OpExpr;
- // Otherwise the pointer is smaller than the resultant integer, mask off
- // the high bits so we are sure to get a proper truncation if the input is
- // a constant expr.
- unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
- const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
- return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
+ break; // Error
}
case Instruction::Sub: {
diff --git a/llvm/lib/CodeGen/AsmPrinter/WasmException.h b/llvm/lib/CodeGen/AsmPrinter/WasmException.h
index 2abbe37cb6d9..419b569d123c 100644
--- a/llvm/lib/CodeGen/AsmPrinter/WasmException.h
+++ b/llvm/lib/CodeGen/AsmPrinter/WasmException.h
@@ -28,7 +28,7 @@ public:
void endModule() override;
void beginFunction(const MachineFunction *MF) override {}
- virtual void markFunctionEnd() override;
+ void markFunctionEnd() override;
void endFunction(const MachineFunction *MF) override;
protected:
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index f21c1bf4e914..ad51bab8f30b 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -515,9 +515,14 @@ void AtomicExpand::expandAtomicStore(StoreInst *SI) {
// It is the responsibility of the target to only signal expansion via
// shouldExpandAtomicRMW in cases where this is required and possible.
IRBuilder<> Builder(SI);
+ AtomicOrdering Ordering = SI->getOrdering();
+ assert(Ordering != AtomicOrdering::NotAtomic);
+ AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
+ ? AtomicOrdering::Monotonic
+ : Ordering;
AtomicRMWInst *AI = Builder.CreateAtomicRMW(
AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
- SI->getAlign(), SI->getOrdering());
+ SI->getAlign(), RMWOrdering);
SI->eraseFromParent();
// Now we have an appropriate swap instruction, lower it as usual.
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index b6c762b93ca5..b8f6fc9bbcde 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2568,8 +2568,6 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
}
};
-} // end anonymous namespace
-
#ifndef NDEBUG
static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
AM.print(OS);
@@ -2617,6 +2615,8 @@ LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
}
#endif
+} // end anonymous namespace
+
namespace {
/// This class provides transaction based operation on the IR.
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index da054b9c14fb..05a25bc3078e 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1142,7 +1142,8 @@ bool CombinerHelper::matchCombineDivRem(MachineInstr &MI,
if (MI.getParent() == UseMI.getParent() &&
((IsDiv && UseMI.getOpcode() == RemOpcode) ||
(!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
- matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2))) {
+ matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
+ matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
OtherMI = &UseMI;
return true;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index dbdcfe0b6f0b..2f9187bbf2ad 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -151,11 +151,11 @@ public:
LLVM_DEBUG(dbgs() << "Checking DILocation from " << *CurrInst
<< " was copied to " << MI);
#endif
- // We allow insts in the entry block to have a debug loc line of 0 because
+ // We allow insts in the entry block to have no debug loc because
// they could have originated from constants, and we don't want a jumpy
// debug experience.
assert((CurrInst->getDebugLoc() == MI.getDebugLoc() ||
- MI.getDebugLoc().getLine() == 0) &&
+ (MI.getParent()->isEntryBlock() && !MI.getDebugLoc())) &&
"Line info was not transferred to all instructions");
}
};
@@ -3020,11 +3020,9 @@ bool IRTranslator::translate(const Instruction &Inst) {
bool IRTranslator::translate(const Constant &C, Register Reg) {
// We only emit constants into the entry block from here. To prevent jumpy
- // debug behaviour set the line to 0.
+ // debug behaviour remove debug line.
if (auto CurrInstDL = CurBuilder->getDL())
- EntryBuilder->setDebugLoc(DILocation::get(C.getContext(), 0, 0,
- CurrInstDL.getScope(),
- CurrInstDL.getInlinedAt()));
+ EntryBuilder->setDebugLoc(DebugLoc());
if (auto CI = dyn_cast<ConstantInt>(&C))
EntryBuilder->buildConstant(Reg, *CI);
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index ef49d3888f2b..191596dbf53e 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -1330,7 +1330,7 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) {
const MachineOperand &MO = MI.getOperand(0);
unsigned InstrNum = MI.getOperand(1).getImm();
- auto EmitBadPHI = [this, &MI, InstrNum](void) -> bool {
+ auto EmitBadPHI = [this, &MI, InstrNum]() -> bool {
// Helper lambda to do any accounting when we fail to find a location for
// a DBG_PHI. This can happen if DBG_PHIs are malformed, or refer to a
// dead stack slot, for example.
@@ -3136,8 +3136,7 @@ bool InstrRefBasedLDV::emitTransfers(
MI->getDebugLoc()->getInlinedAt());
Insts.emplace_back(AllVarsNumbering.find(Var)->second, MI);
}
- llvm::sort(Insts,
- [](const auto &A, const auto &B) { return A.first < B.first; });
+ llvm::sort(Insts, llvm::less_first());
// Insert either before or after the designated point...
if (P.MBB) {
diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 2aafb746aa2c..abf36b3f4c67 100644
--- a/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -300,13 +300,15 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
SmallVector<unsigned, 8> RegsToErase;
bool ReadsPhysRegs = false;
bool isOrigDef = false;
- unsigned Dest;
+ Register Dest;
+ unsigned DestSubReg;
// Only optimize rematerialize case when the instruction has one def, since
// otherwise we could leave some dead defs in the code. This case is
// extremely rare.
if (VRM && MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
MI->getDesc().getNumDefs() == 1) {
Dest = MI->getOperand(0).getReg();
+ DestSubReg = MI->getOperand(0).getSubReg();
unsigned Original = VRM->getOriginal(Dest);
LiveInterval &OrigLI = LIS.getInterval(Original);
VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx);
@@ -384,8 +386,18 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
if (isOrigDef && DeadRemats && !HasLiveVRegUses &&
TII.isTriviallyReMaterializable(*MI)) {
LiveInterval &NewLI = createEmptyIntervalFrom(Dest, false);
- VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
+ VNInfo::Allocator &Alloc = LIS.getVNInfoAllocator();
+ VNInfo *VNI = NewLI.getNextValue(Idx, Alloc);
NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI));
+
+ if (DestSubReg) {
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ auto *SR = NewLI.createSubRange(
+ Alloc, TRI->getSubRegIndexLaneMask(DestSubReg));
+ SR->addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(),
+ SR->getNextValue(Idx, Alloc)));
+ }
+
pop_back();
DeadRemats->insert(MI);
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
diff --git a/llvm/lib/CodeGen/MachineFunctionPass.cpp b/llvm/lib/CodeGen/MachineFunctionPass.cpp
index 99494122d608..477310f59112 100644
--- a/llvm/lib/CodeGen/MachineFunctionPass.cpp
+++ b/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/PrintPasses.h"
using namespace llvm;
using namespace ore;
@@ -70,6 +71,17 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
if (ShouldEmitSizeRemarks)
CountBefore = MF.getInstructionCount();
+ // For --print-changed, if the function name is a candidate, save the
+ // serialized MF to be compared later.
+ // TODO Implement --filter-passes.
+ SmallString<0> BeforeStr, AfterStr;
+ bool ShouldPrintChanged = PrintChanged != ChangePrinter::None &&
+ isFunctionInPrintList(MF.getName());
+ if (ShouldPrintChanged) {
+ raw_svector_ostream OS(BeforeStr);
+ MF.print(OS);
+ }
+
bool RV = runOnMachineFunction(MF);
if (ShouldEmitSizeRemarks) {
@@ -97,6 +109,23 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
MFProps.set(SetProperties);
MFProps.reset(ClearedProperties);
+
+ // For --print-changed, print if the serialized MF has changed. Modes other
+ // than quiet/verbose are unimplemented and treated the same as 'quiet'.
+ if (ShouldPrintChanged) {
+ raw_svector_ostream OS(AfterStr);
+ MF.print(OS);
+ if (BeforeStr != AfterStr) {
+ StringRef Arg;
+ if (const PassInfo *PI = Pass::lookupPassInfo(getPassID()))
+ Arg = PI->getPassArgument();
+ errs() << ("*** IR Dump After " + getPassName() + " (" + Arg + ") on " +
+ MF.getName() + " ***\n" + AfterStr);
+ } else if (PrintChanged == ChangePrinter::Verbose) {
+ errs() << ("*** IR Dump After " + getPassName() + " on " + MF.getName() +
+ " omitted because no change ***\n");
+ }
+ }
return RV;
}
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 1115c2a27956..87e2f9f20021 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
@@ -69,6 +70,8 @@ static CallInst::TailCallKind getOverridingTailCallKind(const Function &F) {
static bool lowerObjCCall(Function &F, const char *NewFn,
bool setNonLazyBind = false) {
+ assert(IntrinsicInst::mayLowerToFunctionCall(F.getIntrinsicID()) &&
+ "Pre-ISel intrinsics do lower into regular function calls");
if (F.use_empty())
return false;
@@ -107,7 +110,9 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
IRBuilder<> Builder(CI->getParent(), CI->getIterator());
SmallVector<Value *, 8> Args(CI->args());
- CallInst *NewCI = Builder.CreateCall(FCache, Args);
+ SmallVector<llvm::OperandBundleDef, 1> BundleList;
+ CI->getOperandBundlesAsDefs(BundleList);
+ CallInst *NewCI = Builder.CreateCall(FCache, Args, BundleList);
NewCI->setName(CI->getName());
// Try to set the most appropriate TailCallKind based on both the current
diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 7327f9e52efc..54bb4a31ef49 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -47,7 +47,7 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
- virtual MachineFunctionProperties getRequiredProperties() const override {
+ MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::IsSSA);
}
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 4a54d7ebf8a9..9c6cb7c3a4e2 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -135,6 +135,12 @@ static cl::opt<bool> GreedyRegClassPriorityTrumpsGlobalness(
"more important then whether the range is global"),
cl::Hidden);
+static cl::opt<bool> GreedyReverseLocalAssignment(
+ "greedy-reverse-local-assignment",
+ cl::desc("Reverse allocation order of local live ranges, such that "
+ "shorter local live ranges will tend to be allocated first"),
+ cl::Hidden);
+
static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
createGreedyRegisterAllocator);
@@ -297,11 +303,10 @@ void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
} else {
// Giant live ranges fall back to the global assignment heuristic, which
// prevents excessive spilling in pathological cases.
- bool ReverseLocal = TRI->reverseLocalAssignment();
const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
- bool ForceGlobal =
- !ReverseLocal && (Size / SlotIndex::InstrDist) >
- (2 * RegClassInfo.getNumAllocatableRegs(&RC));
+ bool ForceGlobal = !ReverseLocalAssignment &&
+ (Size / SlotIndex::InstrDist) >
+ (2 * RegClassInfo.getNumAllocatableRegs(&RC));
unsigned GlobalBit = 0;
if (Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
@@ -309,7 +314,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
// Allocate original local ranges in linear instruction order. Since they
// are singly defined, this produces optimal coloring in the absence of
// global interference and other constraints.
- if (!ReverseLocal)
+ if (!ReverseLocalAssignment)
Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex());
else {
// Allocating bottom up may allow many short LRGs to be assigned first
@@ -2528,6 +2533,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
? GreedyRegClassPriorityTrumpsGlobalness
: TRI->regClassPriorityTrumpsGlobalness(*MF);
+ ReverseLocalAssignment = GreedyReverseLocalAssignment.getNumOccurrences()
+ ? GreedyReverseLocalAssignment
+ : TRI->reverseLocalAssignment();
+
ExtraInfo.emplace();
EvictAdvisor =
getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this);
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.h b/llvm/lib/CodeGen/RegAllocGreedy.h
index 316b12d0213b..483f59ed8e8e 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.h
+++ b/llvm/lib/CodeGen/RegAllocGreedy.h
@@ -270,6 +270,8 @@ private:
/// machine function.
bool RegClassPriorityTrumpsGlobalness;
+ bool ReverseLocalAssignment;
+
public:
RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses);
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index edb0756e8c3b..654879115ff9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4877,9 +4877,16 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
return Res;
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
+ // canonicalize constant to RHS (vector doesn't have to splat)
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
+
// If the type is twice as wide is legal, transform the mulhu to a wider
// multiply plus a shift.
if (VT.isSimple() && !VT.isVector()) {
@@ -4887,8 +4894,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
- SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
- SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
+ SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
+ SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
// Compute the high part as N1.
Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
@@ -4908,19 +4915,26 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
return Res;
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
+ // canonicalize constant to RHS (vector doesn't have to splat)
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
+
// (umul_lohi N0, 0) -> (0, 0)
- if (isNullConstant(N->getOperand(1))) {
+ if (isNullConstant(N1)) {
SDValue Zero = DAG.getConstant(0, DL, VT);
return CombineTo(N, Zero, Zero);
}
// (umul_lohi N0, 1) -> (N0, 0)
- if (isOneConstant(N->getOperand(1))) {
+ if (isOneConstant(N1)) {
SDValue Zero = DAG.getConstant(0, DL, VT);
- return CombineTo(N, N->getOperand(0), Zero);
+ return CombineTo(N, N0, Zero);
}
// If the type is twice as wide is legal, transform the mulhu to a wider
@@ -4930,8 +4944,8 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
- SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
- SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
+ SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
+ SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
// Compute the high part as N1.
Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
@@ -7247,6 +7261,7 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
// Otherwise if matching a general funnel shift, it should be clear.
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
SelectionDAG &DAG, bool IsRotate) {
+ const auto &TLI = DAG.getTargetLoweringInfo();
// If EltSize is a power of 2 then:
//
// (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
@@ -7278,19 +7293,20 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
// always invokes undefined behavior for 32-bit X.
//
// Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
+ // This allows us to peek through any operations that only affect Mask's
+ // un-demanded bits.
//
- // NOTE: We can only do this when matching an AND and not a general
- // funnel shift.
+ // NOTE: We can only do this when matching operations which won't modify the
+ // least Log2(EltSize) significant bits and not a general funnel shift.
unsigned MaskLoBits = 0;
- if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
- if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
- KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
- unsigned Bits = Log2_64(EltSize);
- if (NegC->getAPIntValue().getActiveBits() <= Bits &&
- ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
- Neg = Neg.getOperand(0);
- MaskLoBits = Bits;
- }
+ if (IsRotate && isPowerOf2_64(EltSize)) {
+ unsigned Bits = Log2_64(EltSize);
+ APInt DemandedBits =
+ APInt::getLowBitsSet(Neg.getScalarValueSizeInBits(), Bits);
+ if (SDValue Inner =
+ TLI.SimplifyMultipleUseDemandedBits(Neg, DemandedBits, DAG)) {
+ Neg = Inner;
+ MaskLoBits = Bits;
}
}
@@ -7302,15 +7318,15 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
return false;
SDValue NegOp1 = Neg.getOperand(1);
- // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
- // Pos'. The truncation is redundant for the purpose of the equality.
- if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
- if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
- KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
- if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
- ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
- MaskLoBits))
- Pos = Pos.getOperand(0);
+ // On the RHS of [A], if Pos is the result of operation on Pos' that won't
+ // affect Mask's demanded bits, just replace Pos with Pos'. These operations
+ // are redundant for the purpose of the equality.
+ if (MaskLoBits) {
+ APInt DemandedBits =
+ APInt::getLowBitsSet(Pos.getScalarValueSizeInBits(), MaskLoBits);
+ if (SDValue Inner =
+ TLI.SimplifyMultipleUseDemandedBits(Pos, DemandedBits, DAG)) {
+ Pos = Inner;
}
}
@@ -14988,7 +15004,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
// FMA nodes have flags that propagate to the created nodes.
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
- bool UnsafeFPMath =
+ bool CanReassociate =
Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
// Constant fold FMA.
@@ -15012,7 +15028,8 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
CostN1 == TargetLowering::NegatibleCost::Cheaper))
return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
- if (UnsafeFPMath) {
+ // FIXME: use fast math flags instead of Options.UnsafeFPMath
+ if (Options.UnsafeFPMath) {
if (N0CFP && N0CFP->isZero())
return N2;
if (N1CFP && N1CFP->isZero())
@@ -15029,7 +15046,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
!DAG.isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
- if (UnsafeFPMath) {
+ if (CanReassociate) {
// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
@@ -15070,7 +15087,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
}
}
- if (UnsafeFPMath) {
+ if (CanReassociate) {
// (fma x, c, x) -> (fmul x, (c+1))
if (N1CFP && N0 == N2) {
return DAG.getNode(
@@ -19697,8 +19714,11 @@ static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
// extract.
SDValue Op0 = Vec.getOperand(0);
SDValue Op1 = Vec.getOperand(1);
+ APInt SplatVal;
if (isAnyConstantBuildVector(Op0, true) ||
- isAnyConstantBuildVector(Op1, true)) {
+ ISD::isConstantSplatVector(Op0.getNode(), SplatVal) ||
+ isAnyConstantBuildVector(Op1, true) ||
+ ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) {
// extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
// extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
SDLoc DL(ExtElt);
@@ -19775,6 +19795,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// converts.
}
+ if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
+ return BO;
+
if (VecVT.isScalableVector())
return SDValue();
@@ -19820,9 +19843,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
}
}
- if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
- return BO;
-
// Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
// We only perform this optimization before the op legalization phase because
// we may introduce new vector instructions which are not backed by TD
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 441437351852..195c0e6a836f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2529,8 +2529,7 @@ bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask,
/// DemandedElts. We use this predicate to simplify operations downstream.
bool SelectionDAG::MaskedVectorIsZero(SDValue V, const APInt &DemandedElts,
unsigned Depth /* = 0 */) const {
- APInt Mask = APInt::getAllOnes(V.getScalarValueSizeInBits());
- return Mask.isSubsetOf(computeKnownBits(V, DemandedElts, Depth).Zero);
+ return computeKnownBits(V, DemandedElts, Depth).isZero();
}
/// MaskedValueIsAllOnes - Return true if '(Op & Mask) == Mask'.
@@ -9089,6 +9088,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
}
break;
}
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ assert(VTList.NumVTs == 2 && Ops.size() == 2 && "Invalid mul lo/hi op!");
+ assert(VTList.VTs[0].isInteger() && VTList.VTs[0] == VTList.VTs[1] &&
+ VTList.VTs[0] == Ops[0].getValueType() &&
+ VTList.VTs[0] == Ops[1].getValueType() &&
+ "Binary operator types must match!");
+ break;
+ }
case ISD::STRICT_FP_EXTEND:
assert(VTList.NumVTs == 2 && Ops.size() == 2 &&
"Invalid STRICT_FP_EXTEND!");
@@ -11682,6 +11690,35 @@ bool BuildVectorSDNode::isConstant() const {
return true;
}
+Optional<std::pair<APInt, APInt>>
+BuildVectorSDNode::isConstantSequence() const {
+ unsigned NumOps = getNumOperands();
+ if (NumOps < 2)
+ return None;
+
+ if (!isa<ConstantSDNode>(getOperand(0)) ||
+ !isa<ConstantSDNode>(getOperand(1)))
+ return None;
+
+ unsigned EltSize = getValueType(0).getScalarSizeInBits();
+ APInt Start = getConstantOperandAPInt(0).trunc(EltSize);
+ APInt Stride = getConstantOperandAPInt(1).trunc(EltSize) - Start;
+
+ if (Stride.isZero())
+ return None;
+
+ for (unsigned i = 2; i < NumOps; ++i) {
+ if (!isa<ConstantSDNode>(getOperand(i)))
+ return None;
+
+ APInt Val = getConstantOperandAPInt(i).trunc(EltSize);
+ if (Val != (Start + (Stride * i)))
+ return None;
+ }
+
+ return std::make_pair(Start, Stride);
+}
+
bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
// Find the first non-undef value in the shuffle mask.
unsigned i, e;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 4a3ab00614b3..d1915fd4e7ae 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -198,7 +198,7 @@ public:
SDAGSwitchLowering(SelectionDAGBuilder *sdb, FunctionLoweringInfo &funcinfo)
: SwitchCG::SwitchLowering(funcinfo), SDB(sdb) {}
- virtual void addSuccessorWithProb(
+ void addSuccessorWithProb(
MachineBasicBlock *Src, MachineBasicBlock *Dst,
BranchProbability Prob = BranchProbability::getUnknown()) override {
SDB->addSuccessorWithProb(Src, Dst, Prob);
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index cd4f0ae42bcd..6205e74837c0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -654,6 +654,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
SelectionDAG &DAG, unsigned Depth) const {
+ EVT VT = Op.getValueType();
+
+ // Pretend we don't know anything about scalable vectors for now.
+ // TODO: We can probably do more work on simplifying the operations for
+ // scalable vectors, but for now we just bail out.
+ if (VT.isScalableVector())
+ return SDValue();
+
// Limit search depth.
if (Depth >= SelectionDAG::MaxRecursionDepth)
return SDValue();
@@ -664,7 +672,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
// Not demanding any bits/elts from Op.
if (DemandedBits == 0 || DemandedElts == 0)
- return DAG.getUNDEF(Op.getValueType());
+ return DAG.getUNDEF(VT);
bool IsLE = DAG.getDataLayout().isLittleEndian();
unsigned NumElts = DemandedElts.getBitWidth();
@@ -894,6 +902,13 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
unsigned Depth) const {
EVT VT = Op.getValueType();
+
+ // Pretend we don't know anything about scalable vectors for now.
+ // TODO: We can probably do more work on simplifying the operations for
+ // scalable vectors, but for now we just bail out.
+ if (VT.isScalableVector())
+ return SDValue();
+
APInt DemandedElts = VT.isVector()
? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp
index 62b7f629f403..3e14edb5f730 100644
--- a/llvm/lib/DWARFLinker/DWARFLinker.cpp
+++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp
@@ -2343,7 +2343,7 @@ void DWARFLinker::addObjectFile(DWARFFile &File) {
updateAccelKind(*ObjectContexts.back().File.Dwarf);
}
-bool DWARFLinker::link() {
+Error DWARFLinker::link() {
assert(Options.NoOutput || TheDwarfEmitter);
// A unique ID that identifies each compile unit.
@@ -2410,6 +2410,55 @@ bool DWARFLinker::link() {
if (!OptContext.File.Dwarf)
continue;
+ // Check whether type units are presented.
+ if (!OptContext.File.Dwarf->types_section_units().empty()) {
+ reportWarning("type units are not currently supported: file will "
+ "be skipped",
+ OptContext.File);
+ OptContext.Skip = true;
+ continue;
+ }
+
+ // Check for unsupported sections. Following sections can be referenced
+ // from .debug_info section. Current DWARFLinker implementation does not
+ // support or update references to these tables. Thus we report warning
+ // and skip corresponding object file.
+ if (!OptContext.File.Dwarf->getDWARFObj()
+ .getRnglistsSection()
+ .Data.empty()) {
+ reportWarning("'.debug_rnglists' is not currently supported: file "
+ "will be skipped",
+ OptContext.File);
+ OptContext.Skip = true;
+ continue;
+ }
+
+ if (!OptContext.File.Dwarf->getDWARFObj()
+ .getLoclistsSection()
+ .Data.empty()) {
+ reportWarning("'.debug_loclists' is not currently supported: file "
+ "will be skipped",
+ OptContext.File);
+ OptContext.Skip = true;
+ continue;
+ }
+
+ if (!OptContext.File.Dwarf->getDWARFObj().getMacroSection().Data.empty()) {
+ reportWarning("'.debug_macro' is not currently supported: file "
+ "will be skipped",
+ OptContext.File);
+ OptContext.Skip = true;
+ continue;
+ }
+
+ if (OptContext.File.Dwarf->getDWARFObj().getMacinfoSection().size() > 1) {
+ reportWarning("'.debug_macinfo' is not currently supported: file "
+ "will be skipped",
+ OptContext.File);
+ OptContext.Skip = true;
+ continue;
+ }
+
// In a first phase, just read in the debug info and load all clang modules.
OptContext.CompileUnits.reserve(
OptContext.File.Dwarf->getNumCompileUnits());
@@ -2660,7 +2709,7 @@ bool DWARFLinker::link() {
"---------------\n\n";
}
- return true;
+ return Error::success();
}
bool DWARFLinker::verify(const DWARFFile &File) {
diff --git a/llvm/lib/DWP/DWP.cpp b/llvm/lib/DWP/DWP.cpp
index 44e39c019e0c..346f4dfd290d 100644
--- a/llvm/lib/DWP/DWP.cpp
+++ b/llvm/lib/DWP/DWP.cpp
@@ -18,6 +18,7 @@
#include "llvm/Object/Decompressor.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/MemoryBuffer.h"
+#include <limits>
using namespace llvm;
using namespace llvm::object;
@@ -654,6 +655,12 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs) {
IndexVersion)];
C.Offset = InfoSectionOffset;
C.Length = Header.Length + 4;
+
+ if (std::numeric_limits<uint32_t>::max() - InfoSectionOffset <
+ C.Length)
+ return make_error<DWPError>(
+ "debug information section offset is greater than 4GB");
+
UnitOffset += C.Length;
if (Header.Version < 5 ||
Header.UnitType == dwarf::DW_UT_split_compile) {
diff --git a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp
index dc07eaeaf615..3a6162db75c4 100644
--- a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp
@@ -18,13 +18,19 @@ static const char *CommonSectionName = "__common";
namespace llvm {
namespace jitlink {
+static Triple createTripleWithCOFFFormat(Triple T) {
+ T.setObjectFormat(Triple::COFF);
+ return T;
+}
+
COFFLinkGraphBuilder::COFFLinkGraphBuilder(
const object::COFFObjectFile &Obj, Triple TT,
LinkGraph::GetEdgeKindNameFunction GetEdgeKindName)
: Obj(Obj),
- G(std::make_unique<LinkGraph>(
- Obj.getFileName().str(), Triple(std::move(TT)), getPointerSize(Obj),
- getEndianness(Obj), std::move(GetEdgeKindName))) {
+ G(std::make_unique<LinkGraph>(Obj.getFileName().str(),
+ createTripleWithCOFFFormat(TT),
+ getPointerSize(Obj), getEndianness(Obj),
+ std::move(GetEdgeKindName))) {
LLVM_DEBUG({
dbgs() << "Created COFFLinkGraphBuilder for \"" << Obj.getFileName()
<< "\"\n";
@@ -128,16 +134,6 @@ Error COFFLinkGraphBuilder::graphifySections() {
if (Expected<StringRef> SecNameOrErr = Obj.getSectionName(*Sec))
SectionName = *SecNameOrErr;
- bool IsDiscardable =
- (*Sec)->Characteristics &
- (COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_LNK_INFO);
- if (IsDiscardable) {
- LLVM_DEBUG(dbgs() << " " << SecIndex << ": \"" << SectionName
- << "\" is discardable: "
- "No graph section will be created.\n");
- continue;
- }
-
// FIXME: Skip debug info sections
LLVM_DEBUG({
@@ -145,6 +141,8 @@ Error COFFLinkGraphBuilder::graphifySections() {
<< "Creating section for \"" << SectionName << "\"\n";
});
+ // FIXME: Revisit crash when dropping IMAGE_SCN_MEM_DISCARDABLE sections
+
// Get the section's memory protection flags.
MemProt Prot = MemProt::None;
if ((*Sec)->Characteristics & COFF::IMAGE_SCN_MEM_EXECUTE)
@@ -190,6 +188,7 @@ Error COFFLinkGraphBuilder::graphifySymbols() {
LLVM_DEBUG(dbgs() << " Creating graph symbols...\n");
SymbolSets.resize(Obj.getNumberOfSections() + 1);
+ PendingComdatExports.resize(Obj.getNumberOfSections() + 1);
GraphSymbols.resize(Obj.getNumberOfSymbols());
for (COFFSymbolIndex SymIndex = 0;
@@ -232,18 +231,16 @@ Error COFFLinkGraphBuilder::graphifySymbols() {
<< getCOFFSectionName(SectionIndex, Sec, *Sym)
<< " (index: " << SectionIndex << ") \n";
});
- GSym =
- &G->addExternalSymbol(SymbolName, Sym->getValue(), Linkage::Strong);
+ if (!ExternalSymbols.count(SymbolName))
+ ExternalSymbols[SymbolName] =
+ &G->addExternalSymbol(SymbolName, Sym->getValue(), Linkage::Strong);
+ GSym = ExternalSymbols[SymbolName];
} else if (Sym->isWeakExternal()) {
- COFFSymbolIndex TagIndex =
- Sym->getAux<object::coff_aux_weak_external>()->TagIndex;
- assert(Sym->getAux<object::coff_aux_weak_external>()->Characteristics !=
- COFF::IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY &&
- "IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY is not supported.");
- assert(Sym->getAux<object::coff_aux_weak_external>()->Characteristics !=
- COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY &&
- "IMAGE_WEAK_EXTERN_SEARCH_LIBRARY is not supported.");
- WeakAliasRequests.push_back({SymIndex, TagIndex, SymbolName});
+ auto *WeakExternal = Sym->getAux<object::coff_aux_weak_external>();
+ COFFSymbolIndex TagIndex = WeakExternal->TagIndex;
+ uint32_t Characteristics = WeakExternal->Characteristics;
+ WeakExternalRequests.push_back(
+ {SymIndex, TagIndex, Characteristics, SymbolName});
} else {
Expected<jitlink::Symbol *> NewGSym =
createDefinedSymbol(SymIndex, SymbolName, *Sym, Sec);
@@ -279,35 +276,41 @@ Error COFFLinkGraphBuilder::graphifySymbols() {
Error COFFLinkGraphBuilder::flushWeakAliasRequests() {
// Export the weak external symbols and alias it
- for (auto &WeakAlias : WeakAliasRequests) {
- if (auto *Target = getGraphSymbol(WeakAlias.Target)) {
+ for (auto &WeakExternal : WeakExternalRequests) {
+ if (auto *Target = getGraphSymbol(WeakExternal.Target)) {
Expected<object::COFFSymbolRef> AliasSymbol =
- Obj.getSymbol(WeakAlias.Alias);
+ Obj.getSymbol(WeakExternal.Alias);
if (!AliasSymbol)
return AliasSymbol.takeError();
+ // FIXME: IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY and
+ // IMAGE_WEAK_EXTERN_SEARCH_LIBRARY are handled in the same way.
+ Scope S =
+ WeakExternal.Characteristics == COFF::IMAGE_WEAK_EXTERN_SEARCH_ALIAS
+ ? Scope::Default
+ : Scope::Local;
+
// FIXME: Support this when there's a way to handle this.
if (!Target->isDefined())
return make_error<JITLinkError>("Weak external symbol with external "
"symbol as alternative not supported.");
jitlink::Symbol *NewSymbol = &G->addDefinedSymbol(
- Target->getBlock(), Target->getOffset(), WeakAlias.SymbolName,
- Target->getSize(), Linkage::Weak, Scope::Default,
- Target->isCallable(), false);
- setGraphSymbol(AliasSymbol->getSectionNumber(), WeakAlias.Alias,
+ Target->getBlock(), Target->getOffset(), WeakExternal.SymbolName,
+ Target->getSize(), Linkage::Weak, S, Target->isCallable(), false);
+ setGraphSymbol(AliasSymbol->getSectionNumber(), WeakExternal.Alias,
*NewSymbol);
LLVM_DEBUG({
- dbgs() << " " << WeakAlias.Alias
+ dbgs() << " " << WeakExternal.Alias
<< ": Creating weak external symbol for COFF symbol \""
- << WeakAlias.SymbolName << "\" in section "
+ << WeakExternal.SymbolName << "\" in section "
<< AliasSymbol->getSectionNumber() << "\n";
dbgs() << " " << *NewSymbol << "\n";
});
} else
return make_error<JITLinkError>("Weak symbol alias requested but actual "
"symbol not found for symbol " +
- formatv("{0:d}", WeakAlias.Alias));
+ formatv("{0:d}", WeakExternal.Alias));
}
return Error::success();
}
@@ -324,6 +327,8 @@ Error COFFLinkGraphBuilder::calculateImplicitSizeOfSymbols() {
SecIndex <= static_cast<COFFSectionIndex>(Obj.getNumberOfSections());
SecIndex++) {
auto &SymbolSet = SymbolSets[SecIndex];
+ if (SymbolSet.empty())
+ continue;
jitlink::Block *B = getGraphBlock(SecIndex);
orc::ExecutorAddrDiff LastOffset = B->getSize();
orc::ExecutorAddrDiff LastDifferentOffset = B->getSize();
@@ -394,25 +399,35 @@ Expected<Symbol *> COFFLinkGraphBuilder::createDefinedSymbol(
formatv("{0:d}", SymIndex));
Block *B = getGraphBlock(Symbol.getSectionNumber());
+ if (!B) {
+ LLVM_DEBUG({
+ dbgs() << " " << SymIndex
+ << ": Skipping graph symbol since section was not created for "
+ "COFF symbol \""
+ << SymbolName << "\" in section " << Symbol.getSectionNumber()
+ << "\n";
+ });
+ return nullptr;
+ }
+
if (Symbol.isExternal()) {
// This is not a comdat sequence, export the symbol as it is
- if (!isComdatSection(Section))
+ if (!isComdatSection(Section)) {
+
return &G->addDefinedSymbol(
*B, Symbol.getValue(), SymbolName, 0, Linkage::Strong, Scope::Default,
Symbol.getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION, false);
- else {
- if (!PendingComdatExport)
+ } else {
+ if (!PendingComdatExports[Symbol.getSectionNumber()])
return make_error<JITLinkError>("No pending COMDAT export for symbol " +
formatv("{0:d}", SymIndex));
- if (PendingComdatExport->SectionIndex != Symbol.getSectionNumber())
- return make_error<JITLinkError>(
- "COMDAT export section number mismatch for symbol " +
- formatv("{0:d}", SymIndex));
+
return exportCOMDATSymbol(SymIndex, SymbolName, Symbol);
}
}
- if (Symbol.getStorageClass() == COFF::IMAGE_SYM_CLASS_STATIC) {
+ if (Symbol.getStorageClass() == COFF::IMAGE_SYM_CLASS_STATIC ||
+ Symbol.getStorageClass() == COFF::IMAGE_SYM_CLASS_LABEL) {
const object::coff_aux_section_definition *Definition =
Symbol.getSectionDefinition();
if (!Definition || !isComdatSection(Section)) {
@@ -422,12 +437,14 @@ Expected<Symbol *> COFFLinkGraphBuilder::createDefinedSymbol(
Symbol.getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION, false);
}
if (Definition->Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
- // FIXME: don't dead strip this when parent section is alive
- return &G->addDefinedSymbol(
+ auto Target = Definition->getNumber(Symbol.isBigObj());
+ auto GSym = &G->addDefinedSymbol(
*B, Symbol.getValue(), SymbolName, 0, Linkage::Strong, Scope::Local,
Symbol.getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION, false);
+ getGraphBlock(Target)->addEdge(Edge::KeepAlive, 0, *GSym, 0);
+ return GSym;
}
- if (PendingComdatExport)
+ if (PendingComdatExports[Symbol.getSectionNumber()])
return make_error<JITLinkError>(
"COMDAT export request already exists before symbol " +
formatv("{0:d}", SymIndex));
@@ -474,10 +491,16 @@ Expected<Symbol *> COFFLinkGraphBuilder::createCOMDATExportRequest(
break;
}
case COFF::IMAGE_COMDAT_SELECT_LARGEST: {
- // FIXME: Support IMAGE_COMDAT_SELECT_LARGEST when LinkGraph is able to
- // handle this.
- return make_error<JITLinkError>(
- "IMAGE_COMDAT_SELECT_LARGEST is not supported.");
+ // FIXME: Support IMAGE_COMDAT_SELECT_LARGEST properly when LinkGraph is
+ // able to handle this.
+ LLVM_DEBUG({
+ dbgs() << " " << SymIndex
+ << ": Partially supported IMAGE_COMDAT_SELECT_LARGEST was used"
+ " in section "
+ << Symbol.getSectionNumber() << "\n";
+ });
+ L = Linkage::Weak;
+ break;
}
case COFF::IMAGE_COMDAT_SELECT_NEWEST: {
// Even link.exe doesn't support this selection properly.
@@ -489,7 +512,7 @@ Expected<Symbol *> COFFLinkGraphBuilder::createCOMDATExportRequest(
formatv("{0:d}", Definition->Selection));
}
}
- PendingComdatExport = {SymIndex, Symbol.getSectionNumber(), L};
+ PendingComdatExports[Symbol.getSectionNumber()] = {SymIndex, L};
return &G->addAnonymousSymbol(*B, Symbol.getValue(), Definition->Length,
false, false);
}
@@ -499,6 +522,7 @@ Expected<Symbol *>
COFFLinkGraphBuilder::exportCOMDATSymbol(COFFSymbolIndex SymIndex,
StringRef SymbolName,
object::COFFSymbolRef Symbol) {
+ auto &PendingComdatExport = PendingComdatExports[Symbol.getSectionNumber()];
COFFSymbolIndex TargetIndex = PendingComdatExport->SymbolIndex;
Linkage L = PendingComdatExport->Linkage;
jitlink::Symbol *Target = getGraphSymbol(TargetIndex);
diff --git a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h
index 4dc1b14dc4a2..f925f6d7aeef 100644
--- a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h
+++ b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h
@@ -111,19 +111,19 @@ private:
// COMDAT sequence.
struct ComdatExportRequest {
COFFSymbolIndex SymbolIndex;
- COFFSectionIndex SectionIndex;
jitlink::Linkage Linkage;
};
- Optional<ComdatExportRequest> PendingComdatExport;
+ std::vector<Optional<ComdatExportRequest>> PendingComdatExports;
// This represents a pending request to create a weak external symbol with a
// name.
- struct WeakAliasRequest {
+ struct WeakExternalRequest {
COFFSymbolIndex Alias;
COFFSymbolIndex Target;
+ uint32_t Characteristics;
StringRef SymbolName;
};
- std::vector<WeakAliasRequest> WeakAliasRequests;
+ std::vector<WeakExternalRequest> WeakExternalRequests;
// Per COFF section jitlink symbol set sorted by offset.
// Used for calculating implicit size of defined symbols.
@@ -162,6 +162,8 @@ private:
Section *CommonSection = nullptr;
std::vector<Block *> GraphBlocks;
std::vector<Symbol *> GraphSymbols;
+
+ DenseMap<StringRef, Symbol *> ExternalSymbols;
};
template <typename RelocHandlerFunction>
diff --git a/llvm/lib/ExecutionEngine/JITLink/COFF_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/COFF_x86_64.cpp
index 3d36ad1ed767..e2040dc95acc 100644
--- a/llvm/lib/ExecutionEngine/JITLink/COFF_x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/COFF_x86_64.cpp
@@ -12,8 +12,8 @@
#include "llvm/ExecutionEngine/JITLink/COFF_x86_64.h"
#include "COFFLinkGraphBuilder.h"
-#include "EHFrameSupportImpl.h"
#include "JITLinkGeneric.h"
+#include "SEHFrameSupport.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/ExecutionEngine/JITLink/x86_64.h"
#include "llvm/Object/COFF.h"
@@ -26,6 +26,11 @@ using namespace llvm::jitlink;
namespace {
+enum EdgeKind_coff_x86_64 : Edge::Kind {
+ PCRel32 = x86_64::FirstPlatformRelocation,
+ Pointer32NB,
+};
+
class COFFJITLinker_x86_64 : public JITLinker<COFFJITLinker_x86_64> {
friend class JITLinker<COFFJITLinker_x86_64>;
@@ -43,27 +48,7 @@ private:
class COFFLinkGraphBuilder_x86_64 : public COFFLinkGraphBuilder {
private:
- uint64_t ImageBase = 0;
- enum COFFX86RelocationKind {
- COFFAddr32NB,
- COFFRel32,
- };
-
- static Expected<COFFX86RelocationKind>
- getRelocationKind(const uint32_t Type) {
- switch (Type) {
- case COFF::RelocationTypeAMD64::IMAGE_REL_AMD64_ADDR32NB:
- return COFFAddr32NB;
- case COFF::RelocationTypeAMD64::IMAGE_REL_AMD64_REL32:
- return COFFRel32;
- }
-
- return make_error<JITLinkError>("Unsupported x86_64 relocation:" +
- formatv("{0:d}", Type));
- }
-
Error addRelocations() override {
-
LLVM_DEBUG(dbgs() << "Processing relocations:\n");
for (const auto &RelSect : sections())
@@ -74,21 +59,9 @@ private:
return Error::success();
}
- uint64_t getImageBase() {
- if (!ImageBase) {
- ImageBase = std::numeric_limits<uint64_t>::max();
- for (const auto &Block : getGraph().blocks()) {
- if (Block->getAddress().getValue())
- ImageBase = std::min(ImageBase, Block->getAddress().getValue());
- }
- }
- return ImageBase;
- }
-
Error addSingleRelocation(const object::RelocationRef &Rel,
const object::SectionRef &FixupSect,
Block &BlockToFix) {
-
const object::coff_relocation *COFFRel = getObject().getCOFFRelocation(Rel);
auto SymbolIt = Rel.getSymbol();
if (SymbolIt == getObject().symbol_end()) {
@@ -110,62 +83,122 @@ private:
SymIndex, FixupSect.getIndex()),
inconvertibleErrorCode());
- Expected<COFFX86RelocationKind> RelocKind =
- getRelocationKind(Rel.getType());
- if (!RelocKind)
- return RelocKind.takeError();
-
int64_t Addend = 0;
orc::ExecutorAddr FixupAddress =
orc::ExecutorAddr(FixupSect.getAddress()) + Rel.getOffset();
Edge::OffsetT Offset = FixupAddress - BlockToFix.getAddress();
Edge::Kind Kind = Edge::Invalid;
+ const char *FixupPtr = BlockToFix.getContent().data() + Offset;
- switch (*RelocKind) {
- case COFFAddr32NB: {
- Kind = x86_64::Pointer32;
- Offset -= getImageBase();
+ switch (Rel.getType()) {
+ case COFF::RelocationTypeAMD64::IMAGE_REL_AMD64_ADDR32NB: {
+ Kind = EdgeKind_coff_x86_64::Pointer32NB;
+ Addend = *reinterpret_cast<const support::little32_t *>(FixupPtr);
break;
}
- case COFFRel32: {
- Kind = x86_64::BranchPCRel32;
+ case COFF::RelocationTypeAMD64::IMAGE_REL_AMD64_REL32: {
+ Kind = EdgeKind_coff_x86_64::PCRel32;
+ Addend = *reinterpret_cast<const support::little32_t *>(FixupPtr);
break;
}
+ case COFF::RelocationTypeAMD64::IMAGE_REL_AMD64_REL32_1: {
+ Kind = EdgeKind_coff_x86_64::PCRel32;
+ Addend = *reinterpret_cast<const support::little32_t *>(FixupPtr);
+ Addend -= 1;
+ break;
+ }
+ default: {
+ return make_error<JITLinkError>("Unsupported x86_64 relocation:" +
+ formatv("{0:d}", Rel.getType()));
+ }
};
Edge GE(Kind, Offset, *GraphSymbol, Addend);
LLVM_DEBUG({
dbgs() << " ";
- printEdge(dbgs(), BlockToFix, GE, x86_64::getEdgeKindName(Kind));
+ printEdge(dbgs(), BlockToFix, GE, getCOFFX86RelocationKindName(Kind));
dbgs() << "\n";
});
BlockToFix.addEdge(std::move(GE));
+
return Error::success();
}
- /// Return the string name of the given COFF x86_64 edge kind.
- const char *getCOFFX86RelocationKindName(COFFX86RelocationKind R) {
- switch (R) {
- case COFFAddr32NB:
- return "COFFAddr32NB";
- case COFFRel32:
- return "COFFRel32";
+public:
+ COFFLinkGraphBuilder_x86_64(const object::COFFObjectFile &Obj, const Triple T)
+ : COFFLinkGraphBuilder(Obj, std::move(T), getCOFFX86RelocationKindName) {}
+};
+
+class COFFLinkGraphLowering_x86_64 {
+public:
+ // Lowers COFF x86_64 specific edges to generic x86_64 edges.
+ Error lowerCOFFRelocationEdges(LinkGraph &G, JITLinkContext &Ctx) {
+ for (auto *B : G.blocks()) {
+ for (auto &E : B->edges()) {
+ switch (E.getKind()) {
+ case EdgeKind_coff_x86_64::Pointer32NB: {
+ auto ImageBase = getImageBaseAddress(G, Ctx);
+ if (!ImageBase)
+ return ImageBase.takeError();
+ E.setAddend(E.getAddend() - *ImageBase);
+ E.setKind(x86_64::Pointer32);
+ break;
+ }
+ case EdgeKind_coff_x86_64::PCRel32: {
+ E.setKind(x86_64::PCRel32);
+ break;
+ }
+ default:
+ break;
+ }
+ }
}
+ return Error::success();
}
-public:
- COFFLinkGraphBuilder_x86_64(const object::COFFObjectFile &Obj, const Triple T)
- : COFFLinkGraphBuilder(Obj, std::move(T), x86_64::getEdgeKindName) {}
+private:
+ static StringRef getImageBaseSymbolName() { return "__ImageBase"; }
+ Expected<JITTargetAddress> getImageBaseAddress(LinkGraph &G,
+ JITLinkContext &Ctx) {
+ if (this->ImageBase)
+ return this->ImageBase;
+ for (auto *S : G.defined_symbols())
+ if (S->getName() == getImageBaseSymbolName()) {
+ this->ImageBase = S->getAddress().getValue();
+ return this->ImageBase;
+ }
+
+ JITLinkContext::LookupMap Symbols;
+ Symbols[getImageBaseSymbolName()] = SymbolLookupFlags::RequiredSymbol;
+ JITTargetAddress ImageBase;
+ Error Err = Error::success();
+ Ctx.lookup(Symbols,
+ createLookupContinuation([&](Expected<AsyncLookupResult> LR) {
+ ErrorAsOutParameter EAO(&Err);
+ if (!LR) {
+ Err = LR.takeError();
+ return;
+ }
+ auto &ImageBaseSymbol = LR->begin()->second;
+ ImageBase = ImageBaseSymbol.getAddress();
+ }));
+ if (Err)
+ return std::move(Err);
+ this->ImageBase = ImageBase;
+ return ImageBase;
+ }
+ JITTargetAddress ImageBase = 0;
};
-Error buildTables_COFF_x86_64(LinkGraph &G) {
- LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n");
+Error lowerEdges_COFF_x86_64(LinkGraph &G, JITLinkContext *Ctx) {
+ LLVM_DEBUG(dbgs() << "Lowering COFF x86_64 edges:\n");
+ COFFLinkGraphLowering_x86_64 GraphLowering;
+
+ if (auto Err = GraphLowering.lowerCOFFRelocationEdges(G, *Ctx))
+ return Err;
- x86_64::GOTTableManager GOT;
- x86_64::PLTTableManager PLT(GOT);
- visitExistingEdges(G, GOT, PLT);
return Error::success();
}
} // namespace
@@ -173,6 +206,18 @@ Error buildTables_COFF_x86_64(LinkGraph &G) {
namespace llvm {
namespace jitlink {
+/// Return the string name of the given COFF x86_64 edge kind.
+const char *getCOFFX86RelocationKindName(Edge::Kind R) {
+ switch (R) {
+ case PCRel32:
+ return "PCRel32";
+ case Pointer32NB:
+ return "Pointer32NB";
+ default:
+ return x86_64::getEdgeKindName(R);
+ }
+}
+
Expected<std::unique_ptr<LinkGraph>>
createLinkGraphFromCOFFObject_x86_64(MemoryBufferRef ObjectBuffer) {
LLVM_DEBUG({
@@ -194,16 +239,16 @@ void link_COFF_x86_64(std::unique_ptr<LinkGraph> G,
const Triple &TT = G->getTargetTriple();
if (Ctx->shouldAddDefaultTargetPasses(TT)) {
// Add a mark-live pass.
- if (auto MarkLive = Ctx->getMarkLivePass(TT))
+ if (auto MarkLive = Ctx->getMarkLivePass(TT)) {
Config.PrePrunePasses.push_back(std::move(MarkLive));
- else
+ Config.PrePrunePasses.push_back(SEHFrameKeepAlivePass(".pdata"));
+ } else
Config.PrePrunePasses.push_back(markAllSymbolsLive);
- // Add an in-place GOT/Stubs/TLSInfoEntry build pass.
- Config.PostPrunePasses.push_back(buildTables_COFF_x86_64);
-
- // Add GOT/Stubs optimizer pass.
- Config.PreFixupPasses.push_back(x86_64::optimizeGOTAndStubAccesses);
+ // Add COFF edge lowering passes.
+ JITLinkContext *CtxPtr = Ctx.get();
+ Config.PreFixupPasses.push_back(
+ [CtxPtr](LinkGraph &G) { return lowerEdges_COFF_x86_64(G, CtxPtr); });
}
if (auto Err = Ctx->modifyPassConfig(*G, Config))
diff --git a/llvm/lib/ExecutionEngine/JITLink/SEHFrameSupport.h b/llvm/lib/ExecutionEngine/JITLink/SEHFrameSupport.h
new file mode 100644
index 000000000000..f7689e4e4043
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/JITLink/SEHFrameSupport.h
@@ -0,0 +1,61 @@
+//===------- SEHFrameSupport.h - JITLink seh-frame utils --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// SEHFrame utils for JITLink.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_SEHFRAMESUPPORT_H
+#define LLVM_EXECUTIONENGINE_JITLINK_SEHFRAMESUPPORT_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace jitlink {
+/// This pass adds keep-alive edge from SEH frame sections
+/// to the parent function content block.
+class SEHFrameKeepAlivePass {
+public:
+ SEHFrameKeepAlivePass(StringRef SEHFrameSectionName)
+ : SEHFrameSectionName(SEHFrameSectionName) {}
+
+ Error operator()(LinkGraph &G) {
+ auto *S = G.findSectionByName(SEHFrameSectionName);
+ if (!S)
+ return Error::success();
+
+ // Simply consider every block pointed by seh frame block as parants.
+ // This adds some unnecessary keep-alive edges to unwind info blocks,
+ // (xdata) but these blocks are usually dead by default, so they wouldn't
+ // count for the fate of seh frame block.
+ for (auto *B : S->blocks()) {
+ auto &DummySymbol = G.addAnonymousSymbol(*B, 0, 0, false, false);
+ DenseSet<Block *> Children;
+ for (auto &E : B->edges()) {
+ auto &Sym = E.getTarget();
+ if (!Sym.isDefined())
+ continue;
+ Children.insert(&Sym.getBlock());
+ }
+ for (auto *Child : Children)
+ Child->addEdge(Edge(Edge::KeepAlive, 0, DummySymbol, 0));
+ }
+ return Error::success();
+ }
+
+private:
+ StringRef SEHFrameSectionName;
+};
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_SEHFRAMESUPPORT_H \ No newline at end of file
diff --git a/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp
index df9979b47e88..393250a5578b 100644
--- a/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp
@@ -36,6 +36,8 @@ const char *getEdgeKindName(Edge::Kind K) {
return "NegDelta32";
case Delta64FromGOT:
return "Delta64FromGOT";
+ case PCRel32:
+ return "PCRel32";
case BranchPCRel32:
return "BranchPCRel32";
case BranchPCRel32ToPtrJumpStub:
diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp
index 356b81b4f1c5..3de15db3f1c6 100644
--- a/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp
@@ -150,16 +150,39 @@ static Expected<MaterializationUnit::Interface>
getCOFFObjectFileSymbolInfo(ExecutionSession &ES,
const object::COFFObjectFile &Obj) {
MaterializationUnit::Interface I;
-
+ std::vector<Optional<object::coff_aux_section_definition>> ComdatDefs(
+ Obj.getNumberOfSections() + 1);
for (auto &Sym : Obj.symbols()) {
Expected<uint32_t> SymFlagsOrErr = Sym.getFlags();
if (!SymFlagsOrErr)
// TODO: Test this error.
return SymFlagsOrErr.takeError();
- // Skip symbols not defined in this object file.
- if (*SymFlagsOrErr & object::BasicSymbolRef::SF_Undefined)
- continue;
+ // Handle comdat symbols
+ auto COFFSym = Obj.getCOFFSymbol(Sym);
+ bool IsWeak = false;
+ if (auto *Def = COFFSym.getSectionDefinition()) {
+ auto Sec = Obj.getSection(COFFSym.getSectionNumber());
+ if (!Sec)
+ return Sec.takeError();
+ if (((*Sec)->Characteristics & COFF::IMAGE_SCN_LNK_COMDAT) &&
+ Def->Selection != COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
+ ComdatDefs[COFFSym.getSectionNumber()] = *Def;
+ continue;
+ }
+ }
+ if (!COFF::isReservedSectionNumber(COFFSym.getSectionNumber()) &&
+ ComdatDefs[COFFSym.getSectionNumber()]) {
+ auto Def = ComdatDefs[COFFSym.getSectionNumber()];
+ if (Def->Selection != COFF::IMAGE_COMDAT_SELECT_NODUPLICATES) {
+ IsWeak = true;
+ }
+ ComdatDefs[COFFSym.getSectionNumber()] = None;
+ } else {
+ // Skip symbols not defined in this object file.
+ if (*SymFlagsOrErr & object::BasicSymbolRef::SF_Undefined)
+ continue;
+ }
// Skip symbols that are not global.
if (!(*SymFlagsOrErr & object::BasicSymbolRef::SF_Global))
@@ -180,12 +203,13 @@ getCOFFObjectFileSymbolInfo(ExecutionSession &ES,
if (!SymFlags)
return SymFlags.takeError();
*SymFlags |= JITSymbolFlags::Exported;
- auto COFFSym = Obj.getCOFFSymbol(Sym);
// Weak external is always a function
- if (COFFSym.isWeakExternal()) {
+ if (COFFSym.isWeakExternal())
*SymFlags |= JITSymbolFlags::Callable;
- }
+
+ if (IsWeak)
+ *SymFlags |= JITSymbolFlags::Weak;
I.SymbolFlags[ES.intern(*Name)] = std::move(*SymFlags);
}
diff --git a/llvm/lib/FileCheck/FileCheck.cpp b/llvm/lib/FileCheck/FileCheck.cpp
index bf13b6c325ec..5d4cfceefb3e 100644
--- a/llvm/lib/FileCheck/FileCheck.cpp
+++ b/llvm/lib/FileCheck/FileCheck.cpp
@@ -1424,6 +1424,8 @@ void Pattern::printVariableDefs(const SourceMgr &SM,
// Sort variable captures by the order in which they matched the input.
// Ranges shouldn't be overlapping, so we can just compare the start.
llvm::sort(VarCaptures, [](const VarCapture &A, const VarCapture &B) {
+ if (&A == &B)
+ return false;
assert(A.Range.Start != B.Range.Start &&
"unexpected overlapping variable captures");
return A.Range.Start.getPointer() < B.Range.Start.getPointer();
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 26171f537244..f5039eb5126c 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -1627,6 +1627,10 @@ AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
void AtomicRMWInst::Init(BinOp Operation, Value *Ptr, Value *Val,
Align Alignment, AtomicOrdering Ordering,
SyncScope::ID SSID) {
+ assert(Ordering != AtomicOrdering::NotAtomic &&
+ "atomicrmw instructions can only be atomic.");
+ assert(Ordering != AtomicOrdering::Unordered &&
+ "atomicrmw instructions cannot be unordered.");
Op<0>() = Ptr;
Op<1>() = Val;
setOperation(Operation);
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index c50d6901c9da..8ca75f58e403 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -32,6 +32,39 @@
using namespace llvm;
+bool IntrinsicInst::mayLowerToFunctionCall(Intrinsic::ID IID) {
+ switch (IID) {
+ case Intrinsic::objc_autorelease:
+ case Intrinsic::objc_autoreleasePoolPop:
+ case Intrinsic::objc_autoreleasePoolPush:
+ case Intrinsic::objc_autoreleaseReturnValue:
+ case Intrinsic::objc_copyWeak:
+ case Intrinsic::objc_destroyWeak:
+ case Intrinsic::objc_initWeak:
+ case Intrinsic::objc_loadWeak:
+ case Intrinsic::objc_loadWeakRetained:
+ case Intrinsic::objc_moveWeak:
+ case Intrinsic::objc_release:
+ case Intrinsic::objc_retain:
+ case Intrinsic::objc_retainAutorelease:
+ case Intrinsic::objc_retainAutoreleaseReturnValue:
+ case Intrinsic::objc_retainAutoreleasedReturnValue:
+ case Intrinsic::objc_retainBlock:
+ case Intrinsic::objc_storeStrong:
+ case Intrinsic::objc_storeWeak:
+ case Intrinsic::objc_unsafeClaimAutoreleasedReturnValue:
+ case Intrinsic::objc_retainedObject:
+ case Intrinsic::objc_unretainedObject:
+ case Intrinsic::objc_unretainedPointer:
+ case Intrinsic::objc_retain_autorelease:
+ case Intrinsic::objc_sync_enter:
+ case Intrinsic::objc_sync_exit:
+ return true;
+ default:
+ return false;
+ }
+}
+
//===----------------------------------------------------------------------===//
/// DbgVariableIntrinsic - This is the common base class for debug info
/// intrinsics for variables.
diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp
index 0ca40a675fe4..3e82987801c7 100644
--- a/llvm/lib/IR/ModuleSummaryIndex.cpp
+++ b/llvm/lib/IR/ModuleSummaryIndex.cpp
@@ -105,11 +105,13 @@ uint64_t ModuleSummaryIndex::getFlags() const {
Flags |= 0x20;
if (withDSOLocalPropagation())
Flags |= 0x40;
+ if (withWholeProgramVisibility())
+ Flags |= 0x80;
return Flags;
}
void ModuleSummaryIndex::setFlags(uint64_t Flags) {
- assert(Flags <= 0x7f && "Unexpected bits in flag");
+ assert(Flags <= 0xff && "Unexpected bits in flag");
// 1 bit: WithGlobalValueDeadStripping flag.
// Set on combined index only.
if (Flags & 0x1)
@@ -139,6 +141,10 @@ void ModuleSummaryIndex::setFlags(uint64_t Flags) {
// Set on combined index only.
if (Flags & 0x40)
setWithDSOLocalPropagation();
+ // 1 bit: WithWholeProgramVisibility flag.
+ // Set on combined index only.
+ if (Flags & 0x80)
+ setWithWholeProgramVisibility();
}
// Collect for the given module the list of function it defines
diff --git a/llvm/lib/IR/PrintPasses.cpp b/llvm/lib/IR/PrintPasses.cpp
index 83b8c93e766f..fe2da5ca114f 100644
--- a/llvm/lib/IR/PrintPasses.cpp
+++ b/llvm/lib/IR/PrintPasses.cpp
@@ -29,6 +29,50 @@ static cl::opt<bool> PrintAfterAll("print-after-all",
llvm::cl::desc("Print IR after each pass"),
cl::init(false), cl::Hidden);
+// Print out the IR after passes, similar to -print-after-all except that it
+// only prints the IR after passes that change the IR. Those passes that do not
+// make changes to the IR are reported as not making any changes. In addition,
+// the initial IR is also reported. Other hidden options affect the output from
+// this option. -filter-passes will limit the output to the named passes that
+// actually change the IR and other passes are reported as filtered out. The
+// specified passes will either be reported as making no changes (with no IR
+// reported) or the changed IR will be reported. Also, the -filter-print-funcs
+// and -print-module-scope options will do similar filtering based on function
+// name, reporting changed IRs as functions(or modules if -print-module-scope is
+// specified) for a particular function or indicating that the IR has been
+// filtered out. The extra options can be combined, allowing only changed IRs
+// for certain passes on certain functions to be reported in different formats,
+// with the rest being reported as filtered out. The -print-before-changed
+// option will print the IR as it was before each pass that changed it. The
+// optional value of quiet will only report when the IR changes, suppressing all
+// other messages, including the initial IR. The values "diff" and "diff-quiet"
+// will present the changes in a form similar to a patch, in either verbose or
+// quiet mode, respectively. The lines that are removed and added are prefixed
+// with '-' and '+', respectively. The -filter-print-funcs and -filter-passes
+// can be used to filter the output. This reporter relies on the linux diff
+// utility to do comparisons and insert the prefixes. For systems that do not
+// have the necessary facilities, the error message will be shown in place of
+// the expected output.
+cl::opt<ChangePrinter> llvm::PrintChanged(
+ "print-changed", cl::desc("Print changed IRs"), cl::Hidden,
+ cl::ValueOptional, cl::init(ChangePrinter::None),
+ cl::values(
+ clEnumValN(ChangePrinter::Quiet, "quiet", "Run in quiet mode"),
+ clEnumValN(ChangePrinter::DiffVerbose, "diff",
+ "Display patch-like changes"),
+ clEnumValN(ChangePrinter::DiffQuiet, "diff-quiet",
+ "Display patch-like changes in quiet mode"),
+ clEnumValN(ChangePrinter::ColourDiffVerbose, "cdiff",
+ "Display patch-like changes with color"),
+ clEnumValN(ChangePrinter::ColourDiffQuiet, "cdiff-quiet",
+ "Display patch-like changes in quiet mode with color"),
+ clEnumValN(ChangePrinter::DotCfgVerbose, "dot-cfg",
+ "Create a website with graphical changes"),
+ clEnumValN(ChangePrinter::DotCfgQuiet, "dot-cfg-quiet",
+ "Create a website with graphical changes in quiet mode"),
+ // Sentinel value for unspecified option.
+ clEnumValN(ChangePrinter::Verbose, "", "")));
+
static cl::opt<bool>
PrintModuleScope("print-module-scope",
cl::desc("When printing IR for print-[before|after]{-all} "
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index a9e04ba760ca..cc7be24c1dbd 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1103,6 +1103,8 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
updateVCallVisibilityInModule(*RegularLTO.CombinedModule,
Conf.HasWholeProgramVisibility,
DynamicExportSymbols);
+ updatePublicTypeTestCalls(*RegularLTO.CombinedModule,
+ Conf.HasWholeProgramVisibility);
if (Conf.PreOptModuleHook &&
!Conf.PreOptModuleHook(0, *RegularLTO.CombinedModule))
@@ -1482,6 +1484,8 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
std::set<GlobalValue::GUID> ExportedGUIDs;
+ if (hasWholeProgramVisibility(Conf.HasWholeProgramVisibility))
+ ThinLTO.CombinedIndex.setWithWholeProgramVisibility();
// If allowed, upgrade public vcall visibility to linkage unit visibility in
// the summaries before whole program devirtualization below.
updateVCallVisibilityInIndex(ThinLTO.CombinedIndex,
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index e248e58e4e4e..2e32469b4926 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -40,6 +40,7 @@
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
#include "llvm/Transforms/Utils/SplitModule.h"
@@ -560,6 +561,8 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
// the module, if applicable.
Mod.setPartialSampleProfileRatio(CombinedIndex);
+ updatePublicTypeTestCalls(Mod, CombinedIndex.withWholeProgramVisibility());
+
if (Conf.CodeGenOnly) {
codegen(Conf, TM.get(), AddStream, Task, Mod, CombinedIndex);
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp
index 2abf249cbd62..2f7c485b9fc8 100644
--- a/llvm/lib/LTO/LTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -520,6 +520,8 @@ bool LTOCodeGenerator::optimize() {
// linker option in the old LTO API, but this call allows it to be specified
// via the internal option. Must be done before WPD invoked via the optimizer
// pipeline run below.
+ updatePublicTypeTestCalls(*MergedModule,
+ /* WholeProgramVisibilityEnabledInLTO */ false);
updateVCallVisibilityInModule(*MergedModule,
/* WholeProgramVisibilityEnabledInLTO */ false,
// FIXME: This needs linker information via a
@@ -539,6 +541,16 @@ bool LTOCodeGenerator::optimize() {
// Add an appropriate DataLayout instance for this module...
MergedModule->setDataLayout(TargetMach->createDataLayout());
+ if (!SaveIRBeforeOptPath.empty()) {
+ std::error_code EC;
+ raw_fd_ostream OS(SaveIRBeforeOptPath, EC, sys::fs::OF_None);
+ if (EC)
+ report_fatal_error(Twine("Failed to open ") + SaveIRBeforeOptPath +
+ " to save optimized bitcode\n");
+ WriteBitcodeToFile(*MergedModule, OS,
+ /* ShouldPreserveUseListOrder */ true);
+ }
+
ModuleSummaryIndex CombinedIndex(false);
TargetMach = createTargetMachine();
if (!opt(Config, TargetMach.get(), 0, *MergedModule, /*IsThinLTO=*/false,
diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index a1041b3c85f5..2c723bef7d12 100644
--- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -452,6 +452,10 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
bool DisableCodeGen, StringRef SaveTempsDir,
bool Freestanding, unsigned OptLevel, unsigned count,
bool DebugPassManager) {
+ // See comment at call to updateVCallVisibilityInIndex() for why
+ // WholeProgramVisibilityEnabledInLTO is false.
+ updatePublicTypeTestCalls(TheModule,
+ /* WholeProgramVisibilityEnabledInLTO */ false);
// "Benchmark"-like optimization: single-source case
bool SingleModule = (ModuleMap.size() == 1);
@@ -1047,6 +1051,8 @@ void ThinLTOCodeGenerator::run() {
// Currently there is no support for enabling whole program visibility via a
// linker option in the old LTO API, but this call allows it to be specified
// via the internal option. Must be done before WPD below.
+ if (hasWholeProgramVisibility(/* WholeProgramVisibilityEnabledInLTO */ false))
+ Index->setWithWholeProgramVisibility();
updateVCallVisibilityInIndex(*Index,
/* WholeProgramVisibilityEnabledInLTO */ false,
// FIXME: This needs linker information via a
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index 0b4e9866d50a..f6360c4e2f21 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -293,9 +293,8 @@ public:
: ELFObjectWriter(std::move(MOTW)), OS(OS), DwoOS(DwoOS),
IsLittleEndian(IsLittleEndian) {}
- virtual bool checkRelocation(MCContext &Ctx, SMLoc Loc,
- const MCSectionELF *From,
- const MCSectionELF *To) override {
+ bool checkRelocation(MCContext &Ctx, SMLoc Loc, const MCSectionELF *From,
+ const MCSectionELF *To) override {
if (isDwoSection(*From)) {
Ctx.reportError(Loc, "A dwo section may not contain relocations");
return false;
diff --git a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
index cf98cb8ff59f..3ee43398ff65 100644
--- a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
+++ b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
@@ -20,6 +20,11 @@ MCDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
return None;
}
+uint64_t MCDisassembler::suggestBytesToSkip(ArrayRef<uint8_t> Bytes,
+ uint64_t Address) const {
+ return 1;
+}
+
bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value,
uint64_t Address, bool IsBranch,
uint64_t Offset, uint64_t OpSize,
diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index d46ae2247535..8a43a477c1c7 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -253,7 +253,7 @@ class XCOFFObjectWriter : public MCObjectWriter {
CsectGroup &getCsectGroup(const MCSectionXCOFF *MCSec);
- virtual void reset() override;
+ void reset() override;
void executePostLayoutBinding(MCAssembler &, const MCAsmLayout &) override;
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
index 8b44c09023f1..b127e1b43b8e 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
@@ -434,41 +434,13 @@ Error SectionWriter::visit(const OwnedDataSection &Sec) {
return Error::success();
}
-static constexpr std::array<uint8_t, 4> ZlibGnuMagic = {{'Z', 'L', 'I', 'B'}};
-
-static bool isDataGnuCompressed(ArrayRef<uint8_t> Data) {
- return Data.size() > ZlibGnuMagic.size() &&
- std::equal(ZlibGnuMagic.begin(), ZlibGnuMagic.end(), Data.data());
-}
-
-template <class ELFT>
-static std::tuple<uint64_t, uint64_t>
-getDecompressedSizeAndAlignment(ArrayRef<uint8_t> Data) {
- const bool IsGnuDebug = isDataGnuCompressed(Data);
- const uint64_t DecompressedSize =
- IsGnuDebug
- ? support::endian::read64be(Data.data() + ZlibGnuMagic.size())
- : reinterpret_cast<const Elf_Chdr_Impl<ELFT> *>(Data.data())->ch_size;
- const uint64_t DecompressedAlign =
- IsGnuDebug ? 1
- : reinterpret_cast<const Elf_Chdr_Impl<ELFT> *>(Data.data())
- ->ch_addralign;
-
- return std::make_tuple(DecompressedSize, DecompressedAlign);
-}
-
template <class ELFT>
Error ELFSectionWriter<ELFT>::visit(const DecompressedSection &Sec) {
- const size_t DataOffset = isDataGnuCompressed(Sec.OriginalData)
- ? (ZlibGnuMagic.size() + sizeof(Sec.Size))
- : sizeof(Elf_Chdr_Impl<ELFT>);
-
- ArrayRef<uint8_t> CompressedContent(Sec.OriginalData.data() + DataOffset,
- Sec.OriginalData.size() - DataOffset);
+ ArrayRef<uint8_t> Compressed =
+ Sec.OriginalData.slice(sizeof(Elf_Chdr_Impl<ELFT>));
SmallVector<uint8_t, 128> DecompressedContent;
- if (Error Err =
- compression::zlib::uncompress(CompressedContent, DecompressedContent,
- static_cast<size_t>(Sec.Size)))
+ if (Error Err = compression::zlib::uncompress(Compressed, DecompressedContent,
+ static_cast<size_t>(Sec.Size)))
return createStringError(errc::invalid_argument,
"'" + Sec.Name + "': " + toString(std::move(Err)));
@@ -518,7 +490,7 @@ Error BinarySectionWriter::visit(const CompressedSection &Sec) {
template <class ELFT>
Error ELFSectionWriter<ELFT>::visit(const CompressedSection &Sec) {
uint8_t *Buf = reinterpret_cast<uint8_t *>(Out.getBufferStart()) + Sec.Offset;
- Elf_Chdr_Impl<ELFT> Chdr;
+ Elf_Chdr_Impl<ELFT> Chdr = {};
switch (Sec.CompressionType) {
case DebugCompressionType::None:
std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), Buf);
@@ -1731,15 +1703,11 @@ Expected<SectionBase &> ELFBuilder<ELFT>::makeSection(const Elf_Shdr &Shdr) {
if (!Name)
return Name.takeError();
- if (Name->startswith(".zdebug") || (Shdr.sh_flags & ELF::SHF_COMPRESSED)) {
- uint64_t DecompressedSize, DecompressedAlign;
- std::tie(DecompressedSize, DecompressedAlign) =
- getDecompressedSizeAndAlignment<ELFT>(*Data);
- return Obj.addSection<CompressedSection>(
- CompressedSection(*Data, DecompressedSize, DecompressedAlign));
- }
-
- return Obj.addSection<Section>(*Data);
+ if (!(Shdr.sh_flags & ELF::SHF_COMPRESSED))
+ return Obj.addSection<Section>(*Data);
+ auto *Chdr = reinterpret_cast<const Elf_Chdr_Impl<ELFT> *>(Data->data());
+ return Obj.addSection<CompressedSection>(
+ CompressedSection(*Data, Chdr->ch_size, Chdr->ch_addralign));
}
}
}
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.h b/llvm/lib/ObjCopy/ELF/ELFObject.h
index 799db5034532..2c3ea3a5f6d6 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.h
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.h
@@ -115,13 +115,13 @@ public:
Error visit(const OwnedDataSection &Sec) override;
Error visit(const StringTableSection &Sec) override;
Error visit(const DynamicRelocationSection &Sec) override;
- virtual Error visit(const SymbolTableSection &Sec) override = 0;
- virtual Error visit(const RelocationSection &Sec) override = 0;
- virtual Error visit(const GnuDebugLinkSection &Sec) override = 0;
- virtual Error visit(const GroupSection &Sec) override = 0;
- virtual Error visit(const SectionIndexSection &Sec) override = 0;
- virtual Error visit(const CompressedSection &Sec) override = 0;
- virtual Error visit(const DecompressedSection &Sec) override = 0;
+ Error visit(const SymbolTableSection &Sec) override = 0;
+ Error visit(const RelocationSection &Sec) override = 0;
+ Error visit(const GnuDebugLinkSection &Sec) override = 0;
+ Error visit(const GroupSection &Sec) override = 0;
+ Error visit(const SectionIndexSection &Sec) override = 0;
+ Error visit(const CompressedSection &Sec) override = 0;
+ Error visit(const DecompressedSection &Sec) override = 0;
explicit SectionWriter(WritableMemoryBuffer &Buf) : Out(Buf) {}
};
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index b778006cf66e..9ad2c4135167 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -518,6 +518,14 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AVR_ARCH_XMEGA7, EF_AVR_ARCH_MASK);
BCase(EF_AVR_LINKRELAX_PREPARED);
break;
+ case ELF::EM_LOONGARCH:
+ BCaseMask(EF_LOONGARCH_BASE_ABI_ILP32S, EF_LOONGARCH_BASE_ABI_MASK);
+ BCaseMask(EF_LOONGARCH_BASE_ABI_ILP32F, EF_LOONGARCH_BASE_ABI_MASK);
+ BCaseMask(EF_LOONGARCH_BASE_ABI_ILP32D, EF_LOONGARCH_BASE_ABI_MASK);
+ BCaseMask(EF_LOONGARCH_BASE_ABI_LP64S, EF_LOONGARCH_BASE_ABI_MASK);
+ BCaseMask(EF_LOONGARCH_BASE_ABI_LP64F, EF_LOONGARCH_BASE_ABI_MASK);
+ BCaseMask(EF_LOONGARCH_BASE_ABI_LP64D, EF_LOONGARCH_BASE_ABI_MASK);
+ break;
case ELF::EM_RISCV:
BCase(EF_RISCV_RVC);
BCaseMask(EF_RISCV_FLOAT_ABI_SOFT, EF_RISCV_FLOAT_ABI);
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 3b3eefcc29ca..945ef512391b 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1249,6 +1249,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// flattening of blocks.
OptimizePM.addPass(DivRemPairsPass());
+ // Try to annotate calls that were created during optimization.
+ OptimizePM.addPass(TailCallElimPass());
+
// LoopSink (and other loop passes since the last simplifyCFG) might have
// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
OptimizePM.addPass(
diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
index baea0eb53ef9..a0c63fb33369 100644
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -53,64 +53,6 @@ cl::opt<bool> PreservedCFGCheckerInstrumentation::VerifyPreservedCFG(
#endif
);
-// An option that prints out the IR after passes, similar to
-// -print-after-all except that it only prints the IR after passes that
-// change the IR. Those passes that do not make changes to the IR are
-// reported as not making any changes. In addition, the initial IR is
-// also reported. Other hidden options affect the output from this
-// option. -filter-passes will limit the output to the named passes
-// that actually change the IR and other passes are reported as filtered out.
-// The specified passes will either be reported as making no changes (with
-// no IR reported) or the changed IR will be reported. Also, the
-// -filter-print-funcs and -print-module-scope options will do similar
-// filtering based on function name, reporting changed IRs as functions(or
-// modules if -print-module-scope is specified) for a particular function
-// or indicating that the IR has been filtered out. The extra options
-// can be combined, allowing only changed IRs for certain passes on certain
-// functions to be reported in different formats, with the rest being
-// reported as filtered out. The -print-before-changed option will print
-// the IR as it was before each pass that changed it. The optional
-// value of quiet will only report when the IR changes, suppressing
-// all other messages, including the initial IR. The values "diff" and
-// "diff-quiet" will present the changes in a form similar to a patch, in
-// either verbose or quiet mode, respectively. The lines that are removed
-// and added are prefixed with '-' and '+', respectively. The
-// -filter-print-funcs and -filter-passes can be used to filter the output.
-// This reporter relies on the linux diff utility to do comparisons and
-// insert the prefixes. For systems that do not have the necessary
-// facilities, the error message will be shown in place of the expected output.
-//
-enum class ChangePrinter {
- None,
- Verbose,
- Quiet,
- DiffVerbose,
- DiffQuiet,
- ColourDiffVerbose,
- ColourDiffQuiet,
- DotCfgVerbose,
- DotCfgQuiet,
-};
-static cl::opt<ChangePrinter> PrintChanged(
- "print-changed", cl::desc("Print changed IRs"), cl::Hidden,
- cl::ValueOptional, cl::init(ChangePrinter::None),
- cl::values(
- clEnumValN(ChangePrinter::Quiet, "quiet", "Run in quiet mode"),
- clEnumValN(ChangePrinter::DiffVerbose, "diff",
- "Display patch-like changes"),
- clEnumValN(ChangePrinter::DiffQuiet, "diff-quiet",
- "Display patch-like changes in quiet mode"),
- clEnumValN(ChangePrinter::ColourDiffVerbose, "cdiff",
- "Display patch-like changes with color"),
- clEnumValN(ChangePrinter::ColourDiffQuiet, "cdiff-quiet",
- "Display patch-like changes in quiet mode with color"),
- clEnumValN(ChangePrinter::DotCfgVerbose, "dot-cfg",
- "Create a website with graphical changes"),
- clEnumValN(ChangePrinter::DotCfgQuiet, "dot-cfg-quiet",
- "Create a website with graphical changes in quiet mode"),
- // Sentinel value for unspecified option.
- clEnumValN(ChangePrinter::Verbose, "", "")));
-
// An option that supports the -print-changed option. See
// the description for -print-changed for an explanation of the use
// of this option. Note that this option has no effect without -print-changed.
diff --git a/llvm/lib/Support/ARMAttributeParser.cpp b/llvm/lib/Support/ARMAttributeParser.cpp
index adb5d3f0964d..03c0c7aac423 100644
--- a/llvm/lib/Support/ARMAttributeParser.cpp
+++ b/llvm/lib/Support/ARMAttributeParser.cpp
@@ -85,7 +85,7 @@ Error ARMAttributeParser::CPU_arch(AttrType tag) {
static const char *strings[] = {
"Pre-v4", "ARM v4", "ARM v4T", "ARM v5T", "ARM v5TE", "ARM v5TEJ", "ARM v6",
"ARM v6KZ", "ARM v6T2", "ARM v6K", "ARM v7", "ARM v6-M", "ARM v6S-M",
- "ARM v7E-M", "ARM v8", nullptr,
+ "ARM v7E-M", "ARM v8-A", "ARM v8-R",
"ARM v8-M Baseline", "ARM v8-M Mainline", nullptr, nullptr, nullptr,
"ARM v8.1-M Mainline", "ARM v9-A"
};
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e070ce2efa6b..72f0fc94940c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -255,6 +255,12 @@ static bool isZeroingInactiveLanes(SDValue Op) {
return false;
case Intrinsic::aarch64_sve_ptrue:
case Intrinsic::aarch64_sve_pnext:
+ case Intrinsic::aarch64_sve_cmpeq:
+ case Intrinsic::aarch64_sve_cmpne:
+ case Intrinsic::aarch64_sve_cmpge:
+ case Intrinsic::aarch64_sve_cmpgt:
+ case Intrinsic::aarch64_sve_cmphs:
+ case Intrinsic::aarch64_sve_cmphi:
case Intrinsic::aarch64_sve_cmpeq_wide:
case Intrinsic::aarch64_sve_cmpne_wide:
case Intrinsic::aarch64_sve_cmpge_wide:
@@ -265,6 +271,11 @@ static bool isZeroingInactiveLanes(SDValue Op) {
case Intrinsic::aarch64_sve_cmphi_wide:
case Intrinsic::aarch64_sve_cmplo_wide:
case Intrinsic::aarch64_sve_cmpls_wide:
+ case Intrinsic::aarch64_sve_fcmpeq:
+ case Intrinsic::aarch64_sve_fcmpne:
+ case Intrinsic::aarch64_sve_fcmpge:
+ case Intrinsic::aarch64_sve_fcmpgt:
+ case Intrinsic::aarch64_sve_fcmpuo:
return true;
}
}
@@ -879,6 +890,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
if (Subtarget->supportsAddressTopByteIgnored())
setTargetDAGCombine(ISD::LOAD);
+ setTargetDAGCombine(ISD::MSTORE);
+
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine({ISD::SELECT, ISD::VSELECT});
@@ -974,6 +987,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(Op, VT, Custom);
if (Subtarget->hasFullFP16()) {
+ setOperationAction(ISD::ConstantFP, MVT::f16, Legal);
+
setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Custom);
@@ -1619,6 +1634,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
setOperationAction(ISD::BITCAST, VT, Custom);
setOperationAction(ISD::BITREVERSE, VT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::BSWAP, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::CTLZ, VT, Custom);
@@ -11126,6 +11142,20 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
+ if (useSVEForFixedLengthVectorVT(VT)) {
+ if (auto SeqInfo = cast<BuildVectorSDNode>(Op)->isConstantSequence()) {
+ SDLoc DL(Op);
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
+ SDValue Start = DAG.getConstant(SeqInfo->first, DL, ContainerVT);
+ SDValue Steps = DAG.getStepVector(DL, ContainerVT, SeqInfo->second);
+ SDValue Seq = DAG.getNode(ISD::ADD, DL, ContainerVT, Start, Steps);
+ return convertFromScalableVector(DAG, Op.getValueType(), Seq);
+ }
+
+ // Revert to common legalisation for all other variants.
+ return SDValue();
+ }
+
// Try to build a simple constant vector.
Op = NormalizeBuildVector(Op, DAG);
if (VT.isInteger()) {
@@ -12772,6 +12802,12 @@ bool AArch64TargetLowering::shouldSinkOperands(
if (isSplatShuffle(II->getOperand(1)))
Ops.push_back(&II->getOperandUse(1));
return !Ops.empty();
+ case Intrinsic::aarch64_sve_ptest_first:
+ case Intrinsic::aarch64_sve_ptest_last:
+ if (auto *IIOp = dyn_cast<IntrinsicInst>(II->getOperand(0)))
+ if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
+ Ops.push_back(&II->getOperandUse(0));
+ return !Ops.empty();
case Intrinsic::aarch64_sme_write_horiz:
case Intrinsic::aarch64_sme_write_vert:
case Intrinsic::aarch64_sme_writeq_horiz:
@@ -17142,7 +17178,8 @@ static SDValue performSpliceCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-static SDValue performUnpackCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performUnpackCombine(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
assert((N->getOpcode() == AArch64ISD::UUNPKHI ||
N->getOpcode() == AArch64ISD::UUNPKLO) &&
"Unexpected Opcode!");
@@ -17151,6 +17188,42 @@ static SDValue performUnpackCombine(SDNode *N, SelectionDAG &DAG) {
if (N->getOperand(0).isUndef())
return DAG.getUNDEF(N->getValueType(0));
+ // If this is a masked load followed by an UUNPKLO, fold this into a masked
+ // extending load. We can do this even if this is already a masked
+ // {z,}extload.
+ if (N->getOperand(0).getOpcode() == ISD::MLOAD &&
+ N->getOpcode() == AArch64ISD::UUNPKLO) {
+ MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N->getOperand(0));
+ SDValue Mask = MLD->getMask();
+ SDLoc DL(N);
+
+ if (MLD->isUnindexed() && MLD->getExtensionType() != ISD::SEXTLOAD &&
+ SDValue(MLD, 0).hasOneUse() && Mask->getOpcode() == AArch64ISD::PTRUE &&
+ (MLD->getPassThru()->isUndef() ||
+ isZerosVector(MLD->getPassThru().getNode()))) {
+ unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
+ unsigned PgPattern = Mask->getConstantOperandVal(0);
+ EVT VT = N->getValueType(0);
+
+ // Ensure we can double the size of the predicate pattern
+ unsigned NumElts = getNumElementsFromSVEPredPattern(PgPattern);
+ if (NumElts &&
+ NumElts * VT.getVectorElementType().getSizeInBits() <= MinSVESize) {
+ Mask =
+ getPTrue(DAG, DL, VT.changeVectorElementType(MVT::i1), PgPattern);
+ SDValue PassThru = DAG.getConstant(0, DL, VT);
+ SDValue NewLoad = DAG.getMaskedLoad(
+ VT, DL, MLD->getChain(), MLD->getBasePtr(), MLD->getOffset(), Mask,
+ PassThru, MLD->getMemoryVT(), MLD->getMemOperand(),
+ MLD->getAddressingMode(), ISD::ZEXTLOAD);
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), NewLoad.getValue(1));
+
+ return NewLoad;
+ }
+ }
+ }
+
return SDValue();
}
@@ -17484,6 +17557,50 @@ static SDValue performSTORECombine(SDNode *N,
return SDValue();
}
+static SDValue performMSTORECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
+ MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
+ SDValue Value = MST->getValue();
+ SDValue Mask = MST->getMask();
+ SDLoc DL(N);
+
+ // If this is a UZP1 followed by a masked store, fold this into a masked
+ // truncating store. We can do this even if this is already a masked
+ // truncstore.
+ if (Value.getOpcode() == AArch64ISD::UZP1 && Value->hasOneUse() &&
+ MST->isUnindexed() && Mask->getOpcode() == AArch64ISD::PTRUE &&
+ Value.getValueType().isInteger()) {
+ Value = Value.getOperand(0);
+ if (Value.getOpcode() == ISD::BITCAST) {
+ EVT HalfVT =
+ Value.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
+ EVT InVT = Value.getOperand(0).getValueType();
+
+ if (HalfVT.widenIntegerVectorElementType(*DAG.getContext()) == InVT) {
+ unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
+ unsigned PgPattern = Mask->getConstantOperandVal(0);
+
+ // Ensure we can double the size of the predicate pattern
+ unsigned NumElts = getNumElementsFromSVEPredPattern(PgPattern);
+ if (NumElts && NumElts * InVT.getVectorElementType().getSizeInBits() <=
+ MinSVESize) {
+ Mask = getPTrue(DAG, DL, InVT.changeVectorElementType(MVT::i1),
+ PgPattern);
+ return DAG.getMaskedStore(MST->getChain(), DL, Value.getOperand(0),
+ MST->getBasePtr(), MST->getOffset(), Mask,
+ MST->getMemoryVT(), MST->getMemOperand(),
+ MST->getAddressingMode(),
+ /*IsTruncating=*/true);
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
/// \return true if part of the index was folded into the Base.
static bool foldIndexIntoBase(SDValue &BasePtr, SDValue &Index, SDValue Scale,
SDLoc DL, SelectionDAG &DAG) {
@@ -18191,7 +18308,9 @@ static SDValue tryToWidenSetCCOperands(SDNode *Op, SelectionDAG &DAG) {
Op0ExtV, Op1ExtV, Op->getOperand(2));
}
-static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performSETCCCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!");
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
@@ -18234,6 +18353,21 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
}
}
+ // setcc (iN (bitcast (vNi1 X))), 0, (eq|ne)
+ // ==> setcc (iN (zext (i1 (vecreduce_or (vNi1 X))))), 0, (eq|ne)
+ if (DCI.isBeforeLegalize() && VT.isScalarInteger() &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE) && isNullConstant(RHS) &&
+ LHS->getOpcode() == ISD::BITCAST) {
+ EVT ToVT = LHS->getValueType(0);
+ EVT FromVT = LHS->getOperand(0).getValueType();
+ if (FromVT.isFixedLengthVector() &&
+ FromVT.getVectorElementType() == MVT::i1) {
+ LHS = DAG.getNode(ISD::VECREDUCE_OR, DL, MVT::i1, LHS->getOperand(0));
+ LHS = DAG.getNode(ISD::ZERO_EXTEND, DL, ToVT, LHS);
+ return DAG.getSetCC(DL, VT, LHS, RHS, Cond);
+ }
+ }
+
return SDValue();
}
@@ -19376,13 +19510,15 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::VSELECT:
return performVSelectCombine(N, DCI.DAG);
case ISD::SETCC:
- return performSETCCCombine(N, DAG);
+ return performSETCCCombine(N, DCI, DAG);
case ISD::LOAD:
if (performTBISimplification(N->getOperand(1), DCI, DAG))
return SDValue(N, 0);
break;
case ISD::STORE:
return performSTORECombine(N, DCI, DAG, Subtarget);
+ case ISD::MSTORE:
+ return performMSTORECombine(N, DCI, DAG, Subtarget);
case ISD::MGATHER:
case ISD::MSCATTER:
return performMaskedGatherScatterCombine(N, DCI, DAG);
@@ -19407,7 +19543,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performSpliceCombine(N, DAG);
case AArch64ISD::UUNPKLO:
case AArch64ISD::UUNPKHI:
- return performUnpackCombine(N, DAG);
+ return performUnpackCombine(N, DAG, Subtarget);
case AArch64ISD::UZP1:
return performUzpCombine(N, DAG);
case AArch64ISD::SETCC_MERGE_ZERO:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a7b7e5270888..926e7305bab9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4052,6 +4052,12 @@ def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>,
def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>;
def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>;
+// Pattern for FP16 immediates
+let Predicates = [HasFullFP16] in {
+ def : Pat<(f16 fpimm:$in),
+ (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)))>;
+}
+
//===----------------------------------------------------------------------===//
// Floating point conversion instruction.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 4032c4667bc7..9b040860cc3c 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -287,6 +287,8 @@ def AArch64fadda_p_node : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWith
def AArch64fadda_p : PatFrags<(ops node:$op1, node:$op2, node:$op3),
[(AArch64fadda_p_node node:$op1, node:$op2, node:$op3),
(AArch64fadda_p_node (SVEAllActive), node:$op2,
+ (vselect node:$op1, node:$op3, (splat_vector (f16 fpimm_minus0)))),
+ (AArch64fadda_p_node (SVEAllActive), node:$op2,
(vselect node:$op1, node:$op3, (splat_vector (f32 fpimm_minus0)))),
(AArch64fadda_p_node (SVEAllActive), node:$op2,
(vselect node:$op1, node:$op3, (splat_vector (f64 fpimm_minus0))))]>;
@@ -337,6 +339,22 @@ def AArch64bic : PatFrags<(ops node:$op1, node:$op2),
def AArch64subr : PatFrag<(ops node:$op1, node:$op2),
(sub node:$op2, node:$op1)>;
+def AArch64add_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2),
+ [(int_aarch64_sve_add node:$pred, node:$op1, node:$op2),
+ (add node:$op1, (vselect node:$pred, node:$op2, (SVEDup0)))]>;
+def AArch64sub_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2),
+ [(int_aarch64_sve_sub node:$pred, node:$op1, node:$op2),
+ (sub node:$op1, (vselect node:$pred, node:$op2, (SVEDup0)))]>;
+def AArch64mla_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
+ [(int_aarch64_sve_mla node:$pred, node:$op1, node:$op2, node:$op3),
+ (add node:$op1, (AArch64mul_p_oneuse node:$pred, node:$op2, node:$op3)),
+ // add(a, select(mask, mul(b, c), splat(0))) -> mla(a, mask, b, c)
+ (add node:$op1, (vselect node:$pred, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op3), (SVEDup0)))]>;
+def AArch64mls_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
+ [(int_aarch64_sve_mls node:$pred, node:$op1, node:$op2, node:$op3),
+ (sub node:$op1, (AArch64mul_p_oneuse node:$pred, node:$op2, node:$op3)),
+ // sub(a, select(mask, mul(b, c), splat(0))) -> mls(a, mask, b, c)
+ (sub node:$op1, (vselect node:$pred, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op3), (SVEDup0)))]>;
let Predicates = [HasSVE] in {
defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
@@ -359,8 +377,8 @@ let Predicates = [HasSVEorSME] in {
defm EOR_ZZZ : sve_int_bin_cons_log<0b10, "eor", xor>;
defm BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic", AArch64bic>;
- defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add", "ADD_ZPZZ", int_aarch64_sve_add, DestructiveBinaryComm>;
- defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", "SUB_ZPZZ", int_aarch64_sve_sub, DestructiveBinaryCommWithRev, "SUBR_ZPmZ">;
+ defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add", "ADD_ZPZZ", AArch64add_m1, DestructiveBinaryComm>;
+ defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", "SUB_ZPZZ", AArch64sub_m1, DestructiveBinaryCommWithRev, "SUBR_ZPmZ">;
defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", int_aarch64_sve_subr, DestructiveBinaryCommWithRev, "SUB_ZPmZ", /*isReverseInstr*/ 1>;
defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", "ORR_ZPZZ", int_aarch64_sve_orr, DestructiveBinaryComm>;
@@ -391,8 +409,8 @@ let Predicates = [HasSVEorSME] in {
defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", int_aarch64_sve_mad>;
defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb", int_aarch64_sve_msb>;
- defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla", int_aarch64_sve_mla, add, AArch64mul_p_oneuse>;
- defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", int_aarch64_sve_mls, sub, AArch64mul_p_oneuse>;
+ defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla", AArch64mla_m1>;
+ defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", AArch64mls_m1>;
// SVE predicated integer reductions.
defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv", AArch64saddv_p>;
@@ -712,6 +730,12 @@ let Predicates = [HasSVEorSME] in {
(DUP_ZI_D $a, $b)>;
// Duplicate immediate FP into all vector elements.
+ def : Pat<(nxv2f16 (splat_vector (f16 fpimm:$val))),
+ (DUP_ZR_H (MOVi32imm (bitcast_fpimm_to_i32 f16:$val)))>;
+ def : Pat<(nxv4f16 (splat_vector (f16 fpimm:$val))),
+ (DUP_ZR_H (MOVi32imm (bitcast_fpimm_to_i32 f16:$val)))>;
+ def : Pat<(nxv8f16 (splat_vector (f16 fpimm:$val))),
+ (DUP_ZR_H (MOVi32imm (bitcast_fpimm_to_i32 f16:$val)))>;
def : Pat<(nxv2f32 (splat_vector (f32 fpimm:$val))),
(DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>;
def : Pat<(nxv4f32 (splat_vector (f32 fpimm:$val))),
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 1b65589416c3..2f20232e452d 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -350,6 +350,14 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
return MCDisassembler::Fail;
}
+uint64_t AArch64Disassembler::suggestBytesToSkip(ArrayRef<uint8_t> Bytes,
+ uint64_t Address) const {
+ // AArch64 instructions are always 4 bytes wide, so there's no point
+ // in skipping any smaller number of bytes if an instruction can't
+ // be decoded.
+ return 4;
+}
+
static MCSymbolizer *
createAArch64ExternalSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo,
LLVMSymbolLookupCallback SymbolLookUp,
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
index 6761d449a7f4..b9f78546b89b 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
@@ -30,6 +30,9 @@ public:
MCDisassembler::DecodeStatus
getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes,
uint64_t Address, raw_ostream &CStream) const override;
+
+ uint64_t suggestBytesToSkip(ArrayRef<uint8_t> Bytes,
+ uint64_t Address) const override;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
index 04bc91318da8..d655caa80ba8 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
@@ -66,8 +66,8 @@ public:
report_fatal_error("Invalid rule identifier");
}
- virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
- MachineIRBuilder &B) const override;
+ bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+ MachineIRBuilder &B) const override;
};
bool AArch64O0PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index ba206bac68d1..dfb531cda7e9 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -355,8 +355,8 @@ public:
report_fatal_error("Invalid rule identifier");
}
- virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
- MachineIRBuilder &B) const override;
+ bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+ MachineIRBuilder &B) const override;
};
bool AArch64PostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index d7959a82c484..eab1de94e9c8 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -997,8 +997,8 @@ public:
report_fatal_error("Invalid rule identifier");
}
- virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
- MachineIRBuilder &B) const override;
+ bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+ MachineIRBuilder &B) const override;
};
bool AArch64PostLegalizerLoweringInfo::combine(GISelChangeObserver &Observer,
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index 275949c5ee64..50bae68b4d33 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -370,8 +370,8 @@ public:
report_fatal_error("Invalid rule identifier");
}
- virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
- MachineIRBuilder &B) const override;
+ bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+ MachineIRBuilder &B) const override;
};
bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 7cdd4c4af95e..36daecf634d7 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2958,8 +2958,7 @@ class sve_int_mlas_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
let ElementSize = zprty.ElementSize;
}
-multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm, SDPatternOperator op,
- SDPatternOperator outerop, SDPatternOperator mulop> {
+multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm, SDPatternOperator op> {
def _B : sve_int_mlas_vvv_pred<0b00, opc, asm, ZPR8>;
def _H : sve_int_mlas_vvv_pred<0b01, opc, asm, ZPR16>;
def _S : sve_int_mlas_vvv_pred<0b10, opc, asm, ZPR32>;
@@ -2969,15 +2968,6 @@ multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm, SDPatternOperator op,
def : SVE_4_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_4_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_4_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
-
- def : Pat<(outerop nxv16i8:$Op1, (mulop nxv16i1:$pred, nxv16i8:$Op2, nxv16i8:$Op3)),
- (!cast<Instruction>(NAME # _B) $pred, $Op1, $Op2, $Op3)>;
- def : Pat<(outerop nxv8i16:$Op1, (mulop nxv8i1:$pred, nxv8i16:$Op2, nxv8i16:$Op3)),
- (!cast<Instruction>(NAME # _H) $pred, $Op1, $Op2, $Op3)>;
- def : Pat<(outerop nxv4i32:$Op1, (mulop nxv4i1:$pred, nxv4i32:$Op2, nxv4i32:$Op3)),
- (!cast<Instruction>(NAME # _S) $pred, $Op1, $Op2, $Op3)>;
- def : Pat<(outerop nxv2i64:$Op1, (mulop nxv2i1:$pred, nxv2i64:$Op2, nxv2i64:$Op3)),
- (!cast<Instruction>(NAME # _D) $pred, $Op1, $Op2, $Op3)>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index b4a8766d682e..56a9a30bc59a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -29,6 +29,8 @@
#include "AMDGPU.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDGPUMemoryUtils.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/Constants.h"
@@ -43,6 +45,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/OptimizedStructLayout.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <tuple>
#include <vector>
#define DEBUG_TYPE "amdgpu-lower-module-lds"
@@ -97,6 +100,9 @@ class AMDGPULowerModuleLDS : public ModulePass {
static void
removeFromUsedLists(Module &M,
const std::vector<GlobalVariable *> &LocalVars) {
+ // The verifier rejects used lists containing an inttoptr of a constant
+ // so remove the variables from these lists before replaceAllUsesWith
+
SmallPtrSet<Constant *, 32> LocalVarsSet;
for (GlobalVariable *LocalVar : LocalVars)
if (Constant *C = dyn_cast<Constant>(LocalVar->stripPointerCasts()))
@@ -146,12 +152,59 @@ public:
}
bool runOnModule(Module &M) override {
+ LLVMContext &Ctx = M.getContext();
CallGraph CG = CallGraph(M);
bool Changed = superAlignLDSGlobals(M);
+
+ // Move variables used by functions into amdgcn.module.lds
std::vector<GlobalVariable *> ModuleScopeVariables =
AMDGPU::findVariablesToLower(M, nullptr);
- Changed |= processUsedLDS(CG, M, ModuleScopeVariables);
+ if (!ModuleScopeVariables.empty()) {
+ std::string VarName = "llvm.amdgcn.module.lds";
+
+ GlobalVariable *SGV;
+ DenseMap<GlobalVariable *, Constant *> LDSVarsToConstantGEP;
+ std::tie(SGV, LDSVarsToConstantGEP) =
+ createLDSVariableReplacement(M, VarName, ModuleScopeVariables);
+
+ appendToCompilerUsed(
+ M, {static_cast<GlobalValue *>(
+ ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ cast<Constant>(SGV), Type::getInt8PtrTy(Ctx)))});
+
+ removeFromUsedLists(M, ModuleScopeVariables);
+ replaceLDSVariablesWithStruct(M, ModuleScopeVariables, SGV,
+ LDSVarsToConstantGEP,
+ [](Use &) { return true; });
+
+ // This ensures the variable is allocated when called functions access it.
+ // It also lets other passes, specifically PromoteAlloca, accurately
+ // calculate how much LDS will be used by the kernel after lowering.
+
+ IRBuilder<> Builder(Ctx);
+ for (Function &Func : M.functions()) {
+ if (!Func.isDeclaration() && AMDGPU::isKernelCC(&Func)) {
+ const CallGraphNode *N = CG[&Func];
+ const bool CalleesRequireModuleLDS = N->size() > 0;
+
+ if (CalleesRequireModuleLDS) {
+ // If a function this kernel might call requires module LDS,
+ // annotate the kernel to let later passes know it will allocate
+ // this structure, even if not apparent from the IR.
+ markUsedByKernel(Builder, &Func, SGV);
+ } else {
+ // However if we are certain this kernel cannot call a function that
+ // requires module LDS, annotate the kernel so the backend can elide
+ // the allocation without repeating callgraph walks.
+ Func.addFnAttr("amdgpu-elide-module-lds");
+ }
+ }
+ }
+
+ Changed = true;
+ }
+ // Move variables used by kernels into per-kernel instances
for (Function &F : M.functions()) {
if (F.isDeclaration())
continue;
@@ -159,9 +212,37 @@ public:
// Only lower compute kernels' LDS.
if (!AMDGPU::isKernel(F.getCallingConv()))
continue;
+
std::vector<GlobalVariable *> KernelUsedVariables =
AMDGPU::findVariablesToLower(M, &F);
- Changed |= processUsedLDS(CG, M, KernelUsedVariables, &F);
+
+ // Replace all constant uses with instructions if they belong to the
+ // current kernel. Unnecessary, removing will cause test churn.
+ for (size_t I = 0; I < KernelUsedVariables.size(); I++) {
+ GlobalVariable *GV = KernelUsedVariables[I];
+ for (User *U : make_early_inc_range(GV->users())) {
+ if (ConstantExpr *C = dyn_cast<ConstantExpr>(U))
+ AMDGPU::replaceConstantUsesInFunction(C, &F);
+ }
+ GV->removeDeadConstantUsers();
+ }
+
+ if (!KernelUsedVariables.empty()) {
+ std::string VarName =
+ (Twine("llvm.amdgcn.kernel.") + F.getName() + ".lds").str();
+ GlobalVariable *SGV;
+ DenseMap<GlobalVariable *, Constant *> LDSVarsToConstantGEP;
+ std::tie(SGV, LDSVarsToConstantGEP) =
+ createLDSVariableReplacement(M, VarName, KernelUsedVariables);
+
+ removeFromUsedLists(M, KernelUsedVariables);
+ replaceLDSVariablesWithStruct(
+ M, KernelUsedVariables, SGV, LDSVarsToConstantGEP, [&F](Use &U) {
+ Instruction *I = dyn_cast<Instruction>(U.getUser());
+ return I && I->getFunction() == &F;
+ });
+ Changed = true;
+ }
}
return Changed;
@@ -212,16 +293,18 @@ private:
return Changed;
}
- bool processUsedLDS(CallGraph const &CG, Module &M,
- std::vector<GlobalVariable *> const &LDSVarsToTransform,
- Function *F = nullptr) {
+ std::tuple<GlobalVariable *, DenseMap<GlobalVariable *, Constant *>>
+ createLDSVariableReplacement(
+ Module &M, std::string VarName,
+ std::vector<GlobalVariable *> const &LDSVarsToTransform) {
+ // Create a struct instance containing LDSVarsToTransform and map from those
+ // variables to ConstantExprGEP
+ // Variables may be introduced to meet alignment requirements. No aliasing
+ // metadata is useful for these as they have no uses. Erased before return.
+
LLVMContext &Ctx = M.getContext();
const DataLayout &DL = M.getDataLayout();
-
- if (LDSVarsToTransform.empty()) {
- // No variables to rewrite, no changes made.
- return false;
- }
+ assert(!LDSVarsToTransform.empty());
SmallVector<OptimizedStructLayoutField, 8> LayoutFields;
LayoutFields.reserve(LDSVarsToTransform.size());
@@ -234,9 +317,10 @@ private:
performOptimizedStructLayout(LayoutFields);
std::vector<GlobalVariable *> LocalVars;
+ BitVector IsPaddingField;
LocalVars.reserve(LDSVarsToTransform.size()); // will be at least this large
+ IsPaddingField.reserve(LDSVarsToTransform.size());
{
- // This usually won't need to insert any padding, perhaps avoid the alloc
uint64_t CurrentOffset = 0;
for (size_t I = 0; I < LayoutFields.size(); I++) {
GlobalVariable *FGV = static_cast<GlobalVariable *>(
@@ -256,10 +340,12 @@ private:
M, ATy, false, GlobalValue::InternalLinkage, UndefValue::get(ATy),
"", nullptr, GlobalValue::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS,
false));
+ IsPaddingField.push_back(true);
CurrentOffset += Padding;
}
LocalVars.push_back(FGV);
+ IsPaddingField.push_back(false);
CurrentOffset += LayoutFields[I].Size;
}
}
@@ -270,9 +356,6 @@ private:
LocalVars.cbegin(), LocalVars.cend(), std::back_inserter(LocalVarTypes),
[](const GlobalVariable *V) -> Type * { return V->getValueType(); });
- std::string VarName(
- F ? (Twine("llvm.amdgcn.kernel.") + F->getName() + ".lds").str()
- : "llvm.amdgcn.module.lds");
StructType *LDSTy = StructType::create(Ctx, LocalVarTypes, VarName + ".t");
Align StructAlign =
@@ -283,62 +366,65 @@ private:
VarName, nullptr, GlobalValue::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS,
false);
SGV->setAlignment(StructAlign);
- if (!F) {
- appendToCompilerUsed(
- M, {static_cast<GlobalValue *>(
- ConstantExpr::getPointerBitCastOrAddrSpaceCast(
- cast<Constant>(SGV), Type::getInt8PtrTy(Ctx)))});
+
+ DenseMap<GlobalVariable *, Constant *> Map;
+ Type *I32 = Type::getInt32Ty(Ctx);
+ for (size_t I = 0; I < LocalVars.size(); I++) {
+ GlobalVariable *GV = LocalVars[I];
+ Constant *GEPIdx[] = {ConstantInt::get(I32, 0), ConstantInt::get(I32, I)};
+ Constant *GEP = ConstantExpr::getGetElementPtr(LDSTy, SGV, GEPIdx, true);
+ if (IsPaddingField[I]) {
+ assert(GV->use_empty());
+ GV->eraseFromParent();
+ } else {
+ Map[GV] = GEP;
+ }
}
+ assert(Map.size() == LDSVarsToTransform.size());
+ return {SGV, std::move(Map)};
+ }
- // The verifier rejects used lists containing an inttoptr of a constant
- // so remove the variables from these lists before replaceAllUsesWith
- removeFromUsedLists(M, LocalVars);
+ template <typename PredicateTy>
+ void replaceLDSVariablesWithStruct(
+ Module &M, std::vector<GlobalVariable *> const &LDSVarsToTransform,
+ GlobalVariable *SGV,
+ DenseMap<GlobalVariable *, Constant *> &LDSVarsToConstantGEP,
+ PredicateTy Predicate) {
+ LLVMContext &Ctx = M.getContext();
+ const DataLayout &DL = M.getDataLayout();
// Create alias.scope and their lists. Each field in the new structure
// does not alias with all other fields.
SmallVector<MDNode *> AliasScopes;
SmallVector<Metadata *> NoAliasList;
- if (LocalVars.size() > 1) {
+ const size_t NumberVars = LDSVarsToTransform.size();
+ if (NumberVars > 1) {
MDBuilder MDB(Ctx);
- AliasScopes.reserve(LocalVars.size());
+ AliasScopes.reserve(NumberVars);
MDNode *Domain = MDB.createAnonymousAliasScopeDomain();
- for (size_t I = 0; I < LocalVars.size(); I++) {
+ for (size_t I = 0; I < NumberVars; I++) {
MDNode *Scope = MDB.createAnonymousAliasScope(Domain);
AliasScopes.push_back(Scope);
}
NoAliasList.append(&AliasScopes[1], AliasScopes.end());
}
- // Replace uses of ith variable with a constantexpr to the ith field of the
- // instance that will be allocated by AMDGPUMachineFunction
- Type *I32 = Type::getInt32Ty(Ctx);
- for (size_t I = 0; I < LocalVars.size(); I++) {
- GlobalVariable *GV = LocalVars[I];
- Constant *GEPIdx[] = {ConstantInt::get(I32, 0), ConstantInt::get(I32, I)};
- Constant *GEP = ConstantExpr::getGetElementPtr(LDSTy, SGV, GEPIdx);
- if (F) {
- // Replace all constant uses with instructions if they belong to the
- // current kernel.
- for (User *U : make_early_inc_range(GV->users())) {
- if (ConstantExpr *C = dyn_cast<ConstantExpr>(U))
- AMDGPU::replaceConstantUsesInFunction(C, F);
- }
-
- GV->removeDeadConstantUsers();
+ // Replace uses of ith variable with a constantexpr to the corresponding
+ // field of the instance that will be allocated by AMDGPUMachineFunction
+ for (size_t I = 0; I < NumberVars; I++) {
+ GlobalVariable *GV = LDSVarsToTransform[I];
+ Constant *GEP = LDSVarsToConstantGEP[GV];
- GV->replaceUsesWithIf(GEP, [F](Use &U) {
- Instruction *I = dyn_cast<Instruction>(U.getUser());
- return I && I->getFunction() == F;
- });
- } else {
- GV->replaceAllUsesWith(GEP);
- }
+ GV->replaceUsesWithIf(GEP, Predicate);
if (GV->use_empty()) {
GV->eraseFromParent();
}
- uint64_t Off = DL.getStructLayout(LDSTy)->getElementOffset(I);
- Align A = commonAlignment(StructAlign, Off);
+ APInt APOff(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
+ GEP->stripAndAccumulateInBoundsConstantOffsets(DL, APOff);
+ uint64_t Offset = APOff.getZExtValue();
+
+ Align A = commonAlignment(SGV->getAlign().valueOrOne(), Offset);
if (I)
NoAliasList[I - 1] = AliasScopes[I - 1];
@@ -349,32 +435,6 @@ private:
refineUsesAlignmentAndAA(GEP, A, DL, AliasScope, NoAlias);
}
-
- // This ensures the variable is allocated when called functions access it.
- // It also lets other passes, specifically PromoteAlloca, accurately
- // calculate how much LDS will be used by the kernel after lowering.
- if (!F) {
- IRBuilder<> Builder(Ctx);
- for (Function &Func : M.functions()) {
- if (!Func.isDeclaration() && AMDGPU::isKernelCC(&Func)) {
- const CallGraphNode *N = CG[&Func];
- const bool CalleesRequireModuleLDS = N->size() > 0;
-
- if (CalleesRequireModuleLDS) {
- // If a function this kernel might call requires module LDS,
- // annotate the kernel to let later passes know it will allocate
- // this structure, even if not apparent from the IR.
- markUsedByKernel(Builder, &Func, SGV);
- } else {
- // However if we are certain this kernel cannot call a function that
- // requires module LDS, annotate the kernel so the backend can elide
- // the allocation without repeating callgraph walks.
- Func.addFnAttr("amdgpu-elide-module-lds");
- }
- }
- }
- }
- return true;
}
void refineUsesAlignmentAndAA(Value *Ptr, Align A, const DataLayout &DL,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h
index 753f7edc9385..98b5031071cf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h
@@ -29,7 +29,7 @@ public:
virtual ~AMDGPUMIRFormatter() = default;
/// Implement target specific parsing of target custom pseudo source value.
- virtual bool
+ bool
parseCustomPseudoSourceValue(StringRef Src, MachineFunction &MF,
PerFunctionMIParsingState &PFS,
const PseudoSourceValue *&PSV,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
index bfe2e9b66ed4..98e9907068f2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
@@ -191,8 +191,8 @@ public:
report_fatal_error("Invalid rule identifier");
}
- virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
- MachineIRBuilder &B) const override;
+ bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+ MachineIRBuilder &B) const override;
};
bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 04da14cc4916..859deae86f35 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -9,6 +9,18 @@
/// \file
/// This contains a MachineSchedStrategy implementation for maximizing wave
/// occupancy on GCN hardware.
+///
+/// This pass will apply multiple scheduling stages to the same function.
+/// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual
+/// entry point for the scheduling of those regions is
+/// GCNScheduleDAGMILive::runSchedStages.
+
+/// Generally, the reason for having multiple scheduling stages is to account
+/// for the kernel-wide effect of register usage on occupancy. Usually, only a
+/// few scheduling regions will have register pressure high enough to limit
+/// occupancy for the kernel, so constraints can be relaxed to improve ILP in
+/// other regions.
+///
//===----------------------------------------------------------------------===//
#include "GCNSchedStrategy.h"
@@ -20,9 +32,9 @@
using namespace llvm;
GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
- const MachineSchedContext *C) :
- GenericScheduler(C), TargetOccupancy(0), HasClusteredNodes(false),
- HasExcessPressure(false), MF(nullptr) { }
+ const MachineSchedContext *C)
+ : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
+ HasClusteredNodes(false), HasExcessPressure(false) {}
void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
GenericScheduler::initialize(DAG);
@@ -302,210 +314,30 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
return SU;
}
-GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C,
- std::unique_ptr<MachineSchedStrategy> S) :
- ScheduleDAGMILive(C, std::move(S)),
- ST(MF.getSubtarget<GCNSubtarget>()),
- MFI(*MF.getInfo<SIMachineFunctionInfo>()),
- StartingOccupancy(MFI.getOccupancy()),
- MinOccupancy(StartingOccupancy), Stage(Collect), RegionIdx(0) {
+GCNScheduleDAGMILive::GCNScheduleDAGMILive(
+ MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)
+ : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),
+ MFI(*MF.getInfo<SIMachineFunctionInfo>()),
+ StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) {
LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
}
void GCNScheduleDAGMILive::schedule() {
- if (Stage == Collect) {
- // Just record regions at the first pass.
- Regions.push_back(std::make_pair(RegionBegin, RegionEnd));
- return;
- }
-
- std::vector<MachineInstr*> Unsched;
- Unsched.reserve(NumRegionInstrs);
- for (auto &I : *this) {
- Unsched.push_back(&I);
- }
-
- GCNRegPressure PressureBefore;
- if (LIS) {
- PressureBefore = Pressure[RegionIdx];
-
- LLVM_DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:";
- GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI);
- dbgs() << "Region live-in pressure: ";
- llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs());
- dbgs() << "Region register pressure: ";
- PressureBefore.print(dbgs()));
- }
-
- GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
- // Set HasClusteredNodes to true for late stages where we have already
- // collected it. That way pickNode() will not scan SDep's when not needed.
- S.HasClusteredNodes = Stage > InitialSchedule;
- S.HasExcessPressure = false;
- ScheduleDAGMILive::schedule();
- Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
- RescheduleRegions[RegionIdx] = false;
- if (Stage == InitialSchedule && S.HasClusteredNodes)
- RegionsWithClusters[RegionIdx] = true;
- if (S.HasExcessPressure)
- RegionsWithHighRP[RegionIdx] = true;
-
- if (!LIS)
- return;
-
- // Check the results of scheduling.
- auto PressureAfter = getRealRegPressure();
-
- LLVM_DEBUG(dbgs() << "Pressure after scheduling: ";
- PressureAfter.print(dbgs()));
-
- if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
- PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) {
- Pressure[RegionIdx] = PressureAfter;
- RegionsWithMinOcc[RegionIdx] =
- PressureAfter.getOccupancy(ST) == MinOccupancy;
-
- LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
- return;
- }
-
- unsigned WavesAfter =
- std::min(S.TargetOccupancy, PressureAfter.getOccupancy(ST));
- unsigned WavesBefore =
- std::min(S.TargetOccupancy, PressureBefore.getOccupancy(ST));
- LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
- << ", after " << WavesAfter << ".\n");
-
- // We may not be able to keep the current target occupancy because of the just
- // scheduled region. We might still be able to revert scheduling if the
- // occupancy before was higher, or if the current schedule has register
- // pressure higher than the excess limits which could lead to more spilling.
- unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
-
- // Allow memory bound functions to drop to 4 waves if not limited by an
- // attribute.
- if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy &&
- WavesAfter >= MFI.getMinAllowedOccupancy()) {
- LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
- << MFI.getMinAllowedOccupancy() << " waves\n");
- NewOccupancy = WavesAfter;
- }
-
- if (NewOccupancy < MinOccupancy) {
- MinOccupancy = NewOccupancy;
- MFI.limitOccupancy(MinOccupancy);
- RegionsWithMinOcc.reset();
- LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
- << MinOccupancy << ".\n");
- }
-
- unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
- unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
- if (PressureAfter.getVGPRNum(false) > MaxVGPRs ||
- PressureAfter.getAGPRNum() > MaxVGPRs ||
- PressureAfter.getSGPRNum() > MaxSGPRs) {
- RescheduleRegions[RegionIdx] = true;
- RegionsWithHighRP[RegionIdx] = true;
- }
-
- // If this condition is true, then either the occupancy before and after
- // scheduling is the same, or we are allowing the occupancy to drop because
- // the function is memory bound. Even if we are OK with the current occupancy,
- // we still need to verify that we will not introduce any extra chance of
- // spilling.
- if (WavesAfter >= MinOccupancy) {
- if (Stage == UnclusteredReschedule &&
- !PressureAfter.less(ST, PressureBefore)) {
- LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
- } else if (WavesAfter > MFI.getMinWavesPerEU() ||
- PressureAfter.less(ST, PressureBefore) ||
- !RescheduleRegions[RegionIdx]) {
- Pressure[RegionIdx] = PressureAfter;
- RegionsWithMinOcc[RegionIdx] =
- PressureAfter.getOccupancy(ST) == MinOccupancy;
- if (!RegionsWithClusters[RegionIdx] &&
- (Stage + 1) == UnclusteredReschedule)
- RescheduleRegions[RegionIdx] = false;
- return;
- } else {
- LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
- }
- }
-
- RegionsWithMinOcc[RegionIdx] =
- PressureBefore.getOccupancy(ST) == MinOccupancy;
- LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
- RescheduleRegions[RegionIdx] = RegionsWithClusters[RegionIdx] ||
- (Stage + 1) != UnclusteredReschedule;
- RegionEnd = RegionBegin;
- int SkippedDebugInstr = 0;
- for (MachineInstr *MI : Unsched) {
- if (MI->isDebugInstr()) {
- ++SkippedDebugInstr;
- continue;
- }
-
- if (MI->getIterator() != RegionEnd) {
- BB->remove(MI);
- BB->insert(RegionEnd, MI);
- if (!MI->isDebugInstr())
- LIS->handleMove(*MI, true);
- }
- // Reset read-undef flags and update them later.
- for (auto &Op : MI->operands())
- if (Op.isReg() && Op.isDef())
- Op.setIsUndef(false);
- RegisterOperands RegOpers;
- RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
- if (!MI->isDebugInstr()) {
- if (ShouldTrackLaneMasks) {
- // Adjust liveness and add missing dead+read-undef flags.
- SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
- RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
- } else {
- // Adjust for missing dead-def flags.
- RegOpers.detectDeadDefs(*MI, *LIS);
- }
- }
- RegionEnd = MI->getIterator();
- ++RegionEnd;
- LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
- }
-
- // After reverting schedule, debug instrs will now be at the end of the block
- // and RegionEnd will point to the first debug instr. Increment RegionEnd
- // pass debug instrs to the actual end of the scheduling region.
- while (SkippedDebugInstr-- > 0)
- ++RegionEnd;
-
- // If Unsched.front() instruction is a debug instruction, this will actually
- // shrink the region since we moved all debug instructions to the end of the
- // block. Find the first instruction that is not a debug instruction.
- RegionBegin = Unsched.front()->getIterator();
- if (RegionBegin->isDebugInstr()) {
- for (MachineInstr *MI : Unsched) {
- if (MI->isDebugInstr())
- continue;
- RegionBegin = MI->getIterator();
- break;
- }
- }
-
- // Then move the debug instructions back into their correct place and set
- // RegionBegin and RegionEnd if needed.
- placeDebugValues();
-
- Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
+ // Collect all scheduling regions. The actual scheduling is performed in
+ // GCNScheduleDAGMILive::finalizeSchedule.
+ Regions.push_back(std::make_pair(RegionBegin, RegionEnd));
}
-GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const {
+GCNRegPressure
+GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
GCNDownwardRPTracker RPTracker(*LIS);
RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]);
return RPTracker.moveMaxPressure();
}
-void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) {
+void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
+ const MachineBasicBlock *MBB) {
GCNDownwardRPTracker RPTracker(*LIS);
// If the block has the only successor then live-ins of that successor are
@@ -542,7 +374,7 @@ void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) {
RPTracker.reset(*I, &LRS);
}
- for ( ; ; ) {
+ for (;;) {
I = RPTracker.getNext();
if (Regions[CurRegion].first == I || NonDbgMI == I) {
@@ -588,8 +420,9 @@ GCNScheduleDAGMILive::getBBLiveInMap() const {
}
void GCNScheduleDAGMILive::finalizeSchedule() {
- LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
-
+ // Start actual scheduling here. This function is called by the base
+ // MachineScheduler after all regions have been recorded by
+ // GCNScheduleDAGMILive::schedule().
LiveIns.resize(Regions.size());
Pressure.resize(Regions.size());
RescheduleRegions.resize(Regions.size());
@@ -601,142 +434,470 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
RegionsWithHighRP.reset();
RegionsWithMinOcc.reset();
+ runSchedStages();
+}
+
+void GCNScheduleDAGMILive::runSchedStages() {
+ LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
+ InitialScheduleStage S0(GCNSchedStageID::InitialSchedule, *this);
+ UnclusteredRescheduleStage S1(GCNSchedStageID::UnclusteredReschedule, *this);
+ ClusteredLowOccStage S2(GCNSchedStageID::ClusteredLowOccupancyReschedule,
+ *this);
+ PreRARematStage S3(GCNSchedStageID::PreRARematerialize, *this);
+ GCNSchedStage *SchedStages[] = {&S0, &S1, &S2, &S3};
+
if (!Regions.empty())
BBLiveInMap = getBBLiveInMap();
- std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
+ for (auto *Stage : SchedStages) {
+ if (!Stage->initGCNSchedStage())
+ continue;
- do {
- Stage++;
- RegionIdx = 0;
- MachineBasicBlock *MBB = nullptr;
+ for (auto Region : Regions) {
+ RegionBegin = Region.first;
+ RegionEnd = Region.second;
+ // Setup for scheduling the region and check whether it should be skipped.
+ if (!Stage->initGCNRegion()) {
+ Stage->advanceRegion();
+ exitRegion();
+ continue;
+ }
- if (Stage > InitialSchedule) {
- if (!LIS)
- break;
+ ScheduleDAGMILive::schedule();
+ Stage->finalizeGCNRegion();
+ }
- // Retry function scheduling if we found resulting occupancy and it is
- // lower than used for first pass scheduling. This will give more freedom
- // to schedule low register pressure blocks.
- // Code is partially copied from MachineSchedulerBase::scheduleRegions().
+ Stage->finalizeGCNSchedStage();
+ }
+}
- if (Stage == UnclusteredReschedule) {
- if (RescheduleRegions.none())
- continue;
- LLVM_DEBUG(dbgs() <<
- "Retrying function scheduling without clustering.\n");
- }
+#ifndef NDEBUG
+raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) {
+ switch (StageID) {
+ case GCNSchedStageID::InitialSchedule:
+ OS << "Initial Schedule";
+ break;
+ case GCNSchedStageID::UnclusteredReschedule:
+ OS << "Unclustered Reschedule";
+ break;
+ case GCNSchedStageID::ClusteredLowOccupancyReschedule:
+ OS << "Clustered Low Occupancy Reschedule";
+ break;
+ case GCNSchedStageID::PreRARematerialize:
+ OS << "Pre-RA Rematerialize";
+ break;
+ }
+ return OS;
+}
+#endif
- if (Stage == ClusteredLowOccupancyReschedule) {
- if (StartingOccupancy <= MinOccupancy)
- break;
+GCNSchedStage::GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
+ : DAG(DAG), S(static_cast<GCNMaxOccupancySchedStrategy &>(*DAG.SchedImpl)),
+ MF(DAG.MF), MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {}
- LLVM_DEBUG(
- dbgs()
- << "Retrying function scheduling with lowest recorded occupancy "
- << MinOccupancy << ".\n");
- }
+bool GCNSchedStage::initGCNSchedStage() {
+ if (!DAG.LIS)
+ return false;
- if (Stage == PreRARematerialize) {
- if (RegionsWithMinOcc.none() || Regions.size() == 1)
- break;
+ LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID << "\n");
+ return true;
+}
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
- // Check maximum occupancy
- if (ST.computeOccupancy(MF.getFunction(), MFI.getLDSSize()) ==
- MinOccupancy)
- break;
+bool UnclusteredRescheduleStage::initGCNSchedStage() {
+ if (!GCNSchedStage::initGCNSchedStage())
+ return false;
- // FIXME: This pass will invalidate cached MBBLiveIns for regions
- // inbetween the defs and region we sinked the def to. Cached pressure
- // for regions where a def is sinked from will also be invalidated. Will
- // need to be fixed if there is another pass after this pass.
- static_assert(LastStage == PreRARematerialize,
- "Passes after PreRARematerialize are not supported");
+ if (DAG.RescheduleRegions.none())
+ return false;
- collectRematerializableInstructions();
- if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))
- break;
+ SavedMutations.swap(DAG.Mutations);
- LLVM_DEBUG(
- dbgs() << "Retrying function scheduling with improved occupancy of "
- << MinOccupancy << " from rematerializing\n");
- }
- }
+ LLVM_DEBUG(dbgs() << "Retrying function scheduling without clustering.\n");
+ return true;
+}
- if (Stage == UnclusteredReschedule)
- SavedMutations.swap(Mutations);
+bool ClusteredLowOccStage::initGCNSchedStage() {
+ if (!GCNSchedStage::initGCNSchedStage())
+ return false;
- for (auto Region : Regions) {
- if (((Stage == UnclusteredReschedule || Stage == PreRARematerialize) &&
- !RescheduleRegions[RegionIdx]) ||
- (Stage == ClusteredLowOccupancyReschedule &&
- !RegionsWithClusters[RegionIdx] && !RegionsWithHighRP[RegionIdx])) {
+ // Don't bother trying to improve ILP in lower RP regions if occupancy has not
+ // been dropped. All regions will have already been scheduled with the ideal
+ // occupancy targets.
+ if (DAG.StartingOccupancy <= DAG.MinOccupancy)
+ return false;
- ++RegionIdx;
- continue;
- }
+ LLVM_DEBUG(
+ dbgs() << "Retrying function scheduling with lowest recorded occupancy "
+ << DAG.MinOccupancy << ".\n");
+ return true;
+}
- RegionBegin = Region.first;
- RegionEnd = Region.second;
+bool PreRARematStage::initGCNSchedStage() {
+ if (!GCNSchedStage::initGCNSchedStage())
+ return false;
- if (RegionBegin->getParent() != MBB) {
- if (MBB) finishBlock();
- MBB = RegionBegin->getParent();
- startBlock(MBB);
- if (Stage == InitialSchedule)
- computeBlockPressure(MBB);
- }
+ if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1)
+ return false;
- unsigned NumRegionInstrs = std::distance(begin(), end());
- enterRegion(MBB, begin(), end(), NumRegionInstrs);
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ // Check maximum occupancy
+ if (ST.computeOccupancy(MF.getFunction(), MFI.getLDSSize()) ==
+ DAG.MinOccupancy)
+ return false;
- // Skip empty scheduling regions (0 or 1 schedulable instructions).
- if (begin() == end() || begin() == std::prev(end())) {
- exitRegion();
- ++RegionIdx;
- continue;
- }
+ // FIXME: This pass will invalidate cached MBBLiveIns for regions
+ // inbetween the defs and region we sinked the def to. Cached pressure
+ // for regions where a def is sinked from will also be invalidated. Will
+ // need to be fixed if there is another pass after this pass.
- LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
- LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*MBB) << " "
- << MBB->getName() << "\n From: " << *begin()
- << " To: ";
- if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
- else dbgs() << "End";
- dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
+ collectRematerializableInstructions();
+ if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))
+ return false;
- schedule();
+ LLVM_DEBUG(
+ dbgs() << "Retrying function scheduling with improved occupancy of "
+ << DAG.MinOccupancy << " from rematerializing\n");
+ return true;
+}
+
+void GCNSchedStage::finalizeGCNSchedStage() {
+ DAG.finishBlock();
+ LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n");
+}
+
+void UnclusteredRescheduleStage::finalizeGCNSchedStage() {
+ SavedMutations.swap(DAG.Mutations);
+
+ GCNSchedStage::finalizeGCNSchedStage();
+}
+
+bool GCNSchedStage::initGCNRegion() {
+ // Check whether this new region is also a new block.
+ if (DAG.RegionBegin->getParent() != CurrentMBB)
+ setupNewBlock();
+
+ unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end());
+ DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs);
+
+ // Skip empty scheduling regions (0 or 1 schedulable instructions).
+ if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end()))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
+ LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*CurrentMBB)
+ << " " << CurrentMBB->getName()
+ << "\n From: " << *DAG.begin() << " To: ";
+ if (DAG.RegionEnd != CurrentMBB->end()) dbgs() << *DAG.RegionEnd;
+ else dbgs() << "End";
+ dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
+
+ // Save original instruction order before scheduling for possible revert.
+ Unsched.clear();
+ Unsched.reserve(DAG.NumRegionInstrs);
+ for (auto &I : DAG)
+ Unsched.push_back(&I);
+
+ PressureBefore = DAG.Pressure[RegionIdx];
+
+ LLVM_DEBUG(
+ dbgs() << "Pressure before scheduling:\nRegion live-ins:";
+ GCNRPTracker::printLiveRegs(dbgs(), DAG.LiveIns[RegionIdx], DAG.MRI);
+ dbgs() << "Region live-in pressure: ";
+ llvm::getRegPressure(DAG.MRI, DAG.LiveIns[RegionIdx]).print(dbgs());
+ dbgs() << "Region register pressure: "; PressureBefore.print(dbgs()));
+
+ // Set HasClusteredNodes to true for late stages where we have already
+ // collected it. That way pickNode() will not scan SDep's when not needed.
+ S.HasClusteredNodes = StageID > GCNSchedStageID::InitialSchedule;
+ S.HasExcessPressure = false;
+
+ return true;
+}
+
+bool UnclusteredRescheduleStage::initGCNRegion() {
+ if (!DAG.RescheduleRegions[RegionIdx])
+ return false;
+
+ return GCNSchedStage::initGCNRegion();
+}
+
+bool ClusteredLowOccStage::initGCNRegion() {
+ // We may need to reschedule this region if it doesn't have clusters so it
+ // wasn't rescheduled in the last stage, or if we found it was testing
+ // critical register pressure limits in the unclustered reschedule stage. The
+ // later is because we may not have been able to raise the min occupancy in
+ // the previous stage so the region may be overly constrained even if it was
+ // already rescheduled.
+ if (!DAG.RegionsWithClusters[RegionIdx] && !DAG.RegionsWithHighRP[RegionIdx])
+ return false;
+
+ return GCNSchedStage::initGCNRegion();
+}
+
+bool PreRARematStage::initGCNRegion() {
+ if (!DAG.RescheduleRegions[RegionIdx])
+ return false;
+
+ return GCNSchedStage::initGCNRegion();
+}
+
+void GCNSchedStage::setupNewBlock() {
+ if (CurrentMBB)
+ DAG.finishBlock();
+
+ CurrentMBB = DAG.RegionBegin->getParent();
+ DAG.startBlock(CurrentMBB);
+ // Get real RP for the region if it hasn't be calculated before. After the
+ // initial schedule stage real RP will be collected after scheduling.
+ if (StageID == GCNSchedStageID::InitialSchedule)
+ DAG.computeBlockPressure(RegionIdx, CurrentMBB);
+}
+
+void GCNSchedStage::finalizeGCNRegion() {
+ DAG.Regions[RegionIdx] = std::make_pair(DAG.RegionBegin, DAG.RegionEnd);
+ DAG.RescheduleRegions[RegionIdx] = false;
+ if (S.HasExcessPressure)
+ DAG.RegionsWithHighRP[RegionIdx] = true;
+
+ // Revert scheduling if we have dropped occupancy or there is some other
+ // reason that the original schedule is better.
+ checkScheduling();
+
+ DAG.exitRegion();
+ RegionIdx++;
+}
+
+void InitialScheduleStage::finalizeGCNRegion() {
+ // Record which regions have clustered nodes for the next unclustered
+ // reschedule stage.
+ assert(nextStage(StageID) == GCNSchedStageID::UnclusteredReschedule);
+ if (S.HasClusteredNodes)
+ DAG.RegionsWithClusters[RegionIdx] = true;
+
+ GCNSchedStage::finalizeGCNRegion();
+}
+
+void GCNSchedStage::checkScheduling() {
+ // Check the results of scheduling.
+ PressureAfter = DAG.getRealRegPressure(RegionIdx);
+ LLVM_DEBUG(dbgs() << "Pressure after scheduling: ";
+ PressureAfter.print(dbgs()));
+
+ if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
+ PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) {
+ DAG.Pressure[RegionIdx] = PressureAfter;
+ DAG.RegionsWithMinOcc[RegionIdx] =
+ PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
+
+ // Early out if we have achieve the occupancy target.
+ LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
+ return;
+ }
+
+ unsigned WavesAfter =
+ std::min(S.getTargetOccupancy(), PressureAfter.getOccupancy(ST));
+ unsigned WavesBefore =
+ std::min(S.getTargetOccupancy(), PressureBefore.getOccupancy(ST));
+ LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
+ << ", after " << WavesAfter << ".\n");
+
+ // We may not be able to keep the current target occupancy because of the just
+ // scheduled region. We might still be able to revert scheduling if the
+ // occupancy before was higher, or if the current schedule has register
+ // pressure higher than the excess limits which could lead to more spilling.
+ unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
+
+ // Allow memory bound functions to drop to 4 waves if not limited by an
+ // attribute.
+ if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy &&
+ WavesAfter >= MFI.getMinAllowedOccupancy()) {
+ LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
+ << MFI.getMinAllowedOccupancy() << " waves\n");
+ NewOccupancy = WavesAfter;
+ }
+
+ if (NewOccupancy < DAG.MinOccupancy) {
+ DAG.MinOccupancy = NewOccupancy;
+ MFI.limitOccupancy(DAG.MinOccupancy);
+ DAG.RegionsWithMinOcc.reset();
+ LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
+ << DAG.MinOccupancy << ".\n");
+ }
- exitRegion();
- ++RegionIdx;
+ unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
+ unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
+ if (PressureAfter.getVGPRNum(false) > MaxVGPRs ||
+ PressureAfter.getAGPRNum() > MaxVGPRs ||
+ PressureAfter.getSGPRNum() > MaxSGPRs) {
+ DAG.RescheduleRegions[RegionIdx] = true;
+ DAG.RegionsWithHighRP[RegionIdx] = true;
+ }
+
+ // Revert if this region's schedule would cause a drop in occupancy or
+ // spilling.
+ if (shouldRevertScheduling(WavesAfter)) {
+ revertScheduling();
+ } else {
+ DAG.Pressure[RegionIdx] = PressureAfter;
+ DAG.RegionsWithMinOcc[RegionIdx] =
+ PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
+ }
+}
+
+bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {
+ if (WavesAfter < DAG.MinOccupancy)
+ return true;
+
+ return false;
+}
+
+bool InitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
+ if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
+ return true;
+
+ if (mayCauseSpilling(WavesAfter))
+ return true;
+
+ assert(nextStage(StageID) == GCNSchedStageID::UnclusteredReschedule);
+ // Don't reschedule the region in the next stage if it doesn't have clusters.
+ if (!DAG.RegionsWithClusters[RegionIdx])
+ DAG.RescheduleRegions[RegionIdx] = false;
+
+ return false;
+}
+
+bool UnclusteredRescheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
+ if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
+ return true;
+
+ // If RP is not reduced in the unclustred reschedule stage, revert to the old
+ // schedule.
+ if (!PressureAfter.less(ST, PressureBefore)) {
+ LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
+ return true;
+ }
+
+ return false;
+}
+
+bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) {
+ if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
+ return true;
+
+ if (mayCauseSpilling(WavesAfter))
+ return true;
+
+ return false;
+}
+
+bool PreRARematStage::shouldRevertScheduling(unsigned WavesAfter) {
+ if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
+ return true;
+
+ if (mayCauseSpilling(WavesAfter))
+ return true;
+
+ return false;
+}
+
+bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
+ if (WavesAfter <= MFI.getMinWavesPerEU() &&
+ !PressureAfter.less(ST, PressureBefore) &&
+ DAG.RescheduleRegions[RegionIdx]) {
+ LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
+ return true;
+ }
+
+ return false;
+}
+
+void GCNSchedStage::revertScheduling() {
+ DAG.RegionsWithMinOcc[RegionIdx] =
+ PressureBefore.getOccupancy(ST) == DAG.MinOccupancy;
+ LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
+ DAG.RescheduleRegions[RegionIdx] =
+ DAG.RegionsWithClusters[RegionIdx] ||
+ (nextStage(StageID)) != GCNSchedStageID::UnclusteredReschedule;
+ DAG.RegionEnd = DAG.RegionBegin;
+ int SkippedDebugInstr = 0;
+ for (MachineInstr *MI : Unsched) {
+ if (MI->isDebugInstr()) {
+ ++SkippedDebugInstr;
+ continue;
+ }
+
+ if (MI->getIterator() != DAG.RegionEnd) {
+ DAG.BB->remove(MI);
+ DAG.BB->insert(DAG.RegionEnd, MI);
+ if (!MI->isDebugInstr())
+ DAG.LIS->handleMove(*MI, true);
+ }
+
+ // Reset read-undef flags and update them later.
+ for (auto &Op : MI->operands())
+ if (Op.isReg() && Op.isDef())
+ Op.setIsUndef(false);
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *DAG.TRI, DAG.MRI, DAG.ShouldTrackLaneMasks, false);
+ if (!MI->isDebugInstr()) {
+ if (DAG.ShouldTrackLaneMasks) {
+ // Adjust liveness and add missing dead+read-undef flags.
+ SlotIndex SlotIdx = DAG.LIS->getInstructionIndex(*MI).getRegSlot();
+ RegOpers.adjustLaneLiveness(*DAG.LIS, DAG.MRI, SlotIdx, MI);
+ } else {
+ // Adjust for missing dead-def flags.
+ RegOpers.detectDeadDefs(*MI, *DAG.LIS);
+ }
}
- finishBlock();
+ DAG.RegionEnd = MI->getIterator();
+ ++DAG.RegionEnd;
+ LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
+ }
+
+ // After reverting schedule, debug instrs will now be at the end of the block
+ // and RegionEnd will point to the first debug instr. Increment RegionEnd
+ // pass debug instrs to the actual end of the scheduling region.
+ while (SkippedDebugInstr-- > 0)
+ ++DAG.RegionEnd;
+
+ // If Unsched.front() instruction is a debug instruction, this will actually
+ // shrink the region since we moved all debug instructions to the end of the
+ // block. Find the first instruction that is not a debug instruction.
+ DAG.RegionBegin = Unsched.front()->getIterator();
+ if (DAG.RegionBegin->isDebugInstr()) {
+ for (MachineInstr *MI : Unsched) {
+ if (MI->isDebugInstr())
+ continue;
+ DAG.RegionBegin = MI->getIterator();
+ break;
+ }
+ }
+
+ // Then move the debug instructions back into their correct place and set
+ // RegionBegin and RegionEnd if needed.
+ DAG.placeDebugValues();
- if (Stage == UnclusteredReschedule)
- SavedMutations.swap(Mutations);
- } while (Stage != LastStage);
+ DAG.Regions[RegionIdx] = std::make_pair(DAG.RegionBegin, DAG.RegionEnd);
}
-void GCNScheduleDAGMILive::collectRematerializableInstructions() {
- const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);
- for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+void PreRARematStage::collectRematerializableInstructions() {
+ const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
+ for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
Register Reg = Register::index2VirtReg(I);
- if (!LIS->hasInterval(Reg))
+ if (!DAG.LIS->hasInterval(Reg))
continue;
// TODO: Handle AGPR and SGPR rematerialization
- if (!SRI->isVGPRClass(MRI.getRegClass(Reg)) || !MRI.hasOneDef(Reg) ||
- !MRI.hasOneNonDBGUse(Reg))
+ if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
+ !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg))
continue;
- MachineOperand *Op = MRI.getOneDef(Reg);
+ MachineOperand *Op = DAG.MRI.getOneDef(Reg);
MachineInstr *Def = Op->getParent();
if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
continue;
- MachineInstr *UseI = &*MRI.use_instr_nodbg_begin(Reg);
+ MachineInstr *UseI = &*DAG.MRI.use_instr_nodbg_begin(Reg);
if (Def->getParent() == UseI->getParent())
continue;
@@ -744,10 +905,10 @@ void GCNScheduleDAGMILive::collectRematerializableInstructions() {
// live-through or used inside regions at MinOccupancy. This means that the
// register must be in the live-in set for the region.
bool AddedToRematList = false;
- for (unsigned I = 0, E = Regions.size(); I != E; ++I) {
- auto It = LiveIns[I].find(Reg);
- if (It != LiveIns[I].end() && !It->second.none()) {
- if (RegionsWithMinOcc[I]) {
+ for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
+ auto It = DAG.LiveIns[I].find(Reg);
+ if (It != DAG.LiveIns[I].end() && !It->second.none()) {
+ if (DAG.RegionsWithMinOcc[I]) {
RematerializableInsts[I][Def] = UseI;
AddedToRematList = true;
}
@@ -762,8 +923,8 @@ void GCNScheduleDAGMILive::collectRematerializableInstructions() {
}
}
-bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
- const TargetInstrInfo *TII) {
+bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
+ const TargetInstrInfo *TII) {
// Temporary copies of cached variables we will be modifying and replacing if
// sinking succeeds.
SmallVector<
@@ -772,9 +933,10 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
DenseMap<unsigned, GCNRPTracker::LiveRegSet> NewLiveIns;
DenseMap<unsigned, GCNRegPressure> NewPressure;
BitVector NewRescheduleRegions;
+ LiveIntervals *LIS = DAG.LIS;
- NewRegions.resize(Regions.size());
- NewRescheduleRegions.resize(Regions.size());
+ NewRegions.resize(DAG.Regions.size());
+ NewRescheduleRegions.resize(DAG.Regions.size());
// Collect only regions that has a rematerializable def as a live-in.
SmallSet<unsigned, 16> ImpactedRegions;
@@ -784,16 +946,16 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
// Make copies of register pressure and live-ins cache that will be updated
// as we rematerialize.
for (auto Idx : ImpactedRegions) {
- NewPressure[Idx] = Pressure[Idx];
- NewLiveIns[Idx] = LiveIns[Idx];
+ NewPressure[Idx] = DAG.Pressure[Idx];
+ NewLiveIns[Idx] = DAG.LiveIns[Idx];
}
- NewRegions = Regions;
+ NewRegions = DAG.Regions;
NewRescheduleRegions.reset();
DenseMap<MachineInstr *, MachineInstr *> InsertedMIToOldDef;
bool Improved = false;
for (auto I : ImpactedRegions) {
- if (!RegionsWithMinOcc[I])
+ if (!DAG.RegionsWithMinOcc[I])
continue;
Improved = false;
@@ -802,12 +964,12 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
// TODO: Handle occupancy drop due to AGPR and SGPR.
// Check if cause of occupancy drop is due to VGPR usage and not SGPR.
- if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == MinOccupancy)
+ if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == DAG.MinOccupancy)
break;
// The occupancy of this region could have been improved by a previous
// iteration's sinking of defs.
- if (NewPressure[I].getOccupancy(ST) > MinOccupancy) {
+ if (NewPressure[I].getOccupancy(ST) > DAG.MinOccupancy) {
NewRescheduleRegions[I] = true;
Improved = true;
continue;
@@ -827,7 +989,7 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink);
// If in the most optimistic scenario, we cannot improve occupancy, then do
// not attempt to sink any instructions.
- if (OptimisticOccupancy <= MinOccupancy)
+ if (OptimisticOccupancy <= DAG.MinOccupancy)
break;
unsigned ImproveOccupancy = 0;
@@ -842,7 +1004,7 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
// call LiveRangeEdit::allUsesAvailableAt() and
// LiveRangeEdit::canRematerializeAt().
TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
- Def->getOperand(0).getSubReg(), *Def, *TRI);
+ Def->getOperand(0).getSubReg(), *Def, *DAG.TRI);
MachineInstr *NewMI = &*(--InsertPos);
LIS->InsertMachineInstrInMaps(*NewMI);
LIS->removeInterval(Reg);
@@ -851,11 +1013,11 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
// Update region boundaries in scheduling region we sinked from since we
// may sink an instruction that was at the beginning or end of its region
- updateRegionBoundaries(NewRegions, Def, /*NewMI =*/nullptr,
- /*Removing =*/true);
+ DAG.updateRegionBoundaries(NewRegions, Def, /*NewMI =*/nullptr,
+ /*Removing =*/true);
// Update region boundaries in region we sinked to.
- updateRegionBoundaries(NewRegions, InsertPos, NewMI);
+ DAG.updateRegionBoundaries(NewRegions, InsertPos, NewMI);
LaneBitmask PrevMask = NewLiveIns[I][Reg];
// FIXME: Also update cached pressure for where the def was sinked from.
@@ -863,9 +1025,9 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
// the reg from all regions as a live-in.
for (auto Idx : RematDefToLiveInRegions[Def]) {
NewLiveIns[Idx].erase(Reg);
- if (InsertPos->getParent() != Regions[Idx].first->getParent()) {
+ if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) {
// Def is live-through and not used in this block.
- NewPressure[Idx].inc(Reg, PrevMask, LaneBitmask::getNone(), MRI);
+ NewPressure[Idx].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI);
} else {
// Def is used and rematerialized into this block.
GCNDownwardRPTracker RPT(*LIS);
@@ -879,7 +1041,7 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
SinkedDefs.push_back(Def);
ImproveOccupancy = NewPressure[I].getOccupancy(ST);
- if (ImproveOccupancy > MinOccupancy)
+ if (ImproveOccupancy > DAG.MinOccupancy)
break;
}
@@ -888,7 +1050,7 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
for (auto TrackedIdx : RematDefToLiveInRegions[Def])
RematerializableInsts[TrackedIdx].erase(Def);
- if (ImproveOccupancy <= MinOccupancy)
+ if (ImproveOccupancy <= DAG.MinOccupancy)
break;
NewRescheduleRegions[I] = true;
@@ -917,7 +1079,7 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
MachineInstr *OldMI = Entry.second;
// Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
- BBLiveInMap.erase(OldMI);
+ DAG.BBLiveInMap.erase(OldMI);
// Remove OldMI and update LIS
Register Reg = MI->getOperand(0).getReg();
@@ -929,22 +1091,22 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
// Update live-ins, register pressure, and regions caches.
for (auto Idx : ImpactedRegions) {
- LiveIns[Idx] = NewLiveIns[Idx];
- Pressure[Idx] = NewPressure[Idx];
- MBBLiveIns.erase(Regions[Idx].first->getParent());
+ DAG.LiveIns[Idx] = NewLiveIns[Idx];
+ DAG.Pressure[Idx] = NewPressure[Idx];
+ DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent());
}
- Regions = NewRegions;
- RescheduleRegions = NewRescheduleRegions;
+ DAG.Regions = NewRegions;
+ DAG.RescheduleRegions = NewRescheduleRegions;
SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
- MFI.increaseOccupancy(MF, ++MinOccupancy);
+ MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
return true;
}
// Copied from MachineLICM
-bool GCNScheduleDAGMILive::isTriviallyReMaterializable(const MachineInstr &MI) {
- if (!TII->isTriviallyReMaterializable(MI))
+bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
+ if (!DAG.TII->isTriviallyReMaterializable(MI))
return false;
for (const MachineOperand &MO : MI.operands())
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index c3db849cf81a..7aadf89e0bf7 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -28,8 +28,6 @@ class GCNSubtarget;
/// heuristics to determine excess/critical pressure sets. Its goal is to
/// maximize kernel occupancy (i.e. maximum number of waves per simd).
class GCNMaxOccupancySchedStrategy final : public GenericScheduler {
- friend class GCNScheduleDAGMILive;
-
SUnit *pickNodeBidirectional(bool &IsTopNode);
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
@@ -42,15 +40,18 @@ class GCNMaxOccupancySchedStrategy final : public GenericScheduler {
unsigned SGPRPressure, unsigned VGPRPressure);
std::vector<unsigned> Pressure;
+
std::vector<unsigned> MaxPressure;
unsigned SGPRExcessLimit;
+
unsigned VGPRExcessLimit;
- unsigned SGPRCriticalLimit;
- unsigned VGPRCriticalLimit;
unsigned TargetOccupancy;
+ MachineFunction *MF;
+
+public:
// schedule() have seen a clustered memory operation. Set it to false
// before a region scheduling to know if the region had such clusters.
bool HasClusteredNodes;
@@ -59,28 +60,53 @@ class GCNMaxOccupancySchedStrategy final : public GenericScheduler {
// register pressure for actual scheduling heuristics.
bool HasExcessPressure;
- MachineFunction *MF;
+ unsigned SGPRCriticalLimit;
+
+ unsigned VGPRCriticalLimit;
-public:
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C);
SUnit *pickNode(bool &IsTopNode) override;
void initialize(ScheduleDAGMI *DAG) override;
+ unsigned getTargetOccupancy() { return TargetOccupancy; }
+
void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
};
-class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
+enum class GCNSchedStageID : unsigned {
+ InitialSchedule = 0,
+ UnclusteredReschedule = 1,
+ ClusteredLowOccupancyReschedule = 2,
+ PreRARematerialize = 3,
+ LastStage = PreRARematerialize
+};
+
+#ifndef NDEBUG
+raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
+#endif
+
+inline GCNSchedStageID &operator++(GCNSchedStageID &Stage, int) {
+ assert(Stage != GCNSchedStageID::PreRARematerialize);
+ Stage = static_cast<GCNSchedStageID>(static_cast<unsigned>(Stage) + 1);
+ return Stage;
+}
+
+inline GCNSchedStageID nextStage(const GCNSchedStageID Stage) {
+ return static_cast<GCNSchedStageID>(static_cast<unsigned>(Stage) + 1);
+}
- enum : unsigned {
- Collect,
- InitialSchedule,
- UnclusteredReschedule,
- ClusteredLowOccupancyReschedule,
- PreRARematerialize,
- LastStage = PreRARematerialize
- };
+inline bool operator>(GCNSchedStageID &LHS, GCNSchedStageID &RHS) {
+ return static_cast<unsigned>(LHS) > static_cast<unsigned>(RHS);
+}
+
+class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
+ friend class GCNSchedStage;
+ friend class InitialScheduleStage;
+ friend class UnclusteredRescheduleStage;
+ friend class ClusteredLowOccStage;
+ friend class PreRARematStage;
const GCNSubtarget &ST;
@@ -92,12 +118,6 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
// Minimal real occupancy recorder for the function.
unsigned MinOccupancy;
- // Scheduling stage number.
- unsigned Stage;
-
- // Current region index.
- size_t RegionIdx;
-
// Vector of regions recorder for later rescheduling
SmallVector<std::pair<MachineBasicBlock::iterator,
MachineBasicBlock::iterator>, 32> Regions;
@@ -121,6 +141,148 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
// Region pressure cache.
SmallVector<GCNRegPressure, 32> Pressure;
+ // Temporary basic block live-in cache.
+ DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns;
+
+ DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
+
+ DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveInMap() const;
+
+ // Return current region pressure.
+ GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
+
+ // Compute and cache live-ins and pressure for all regions in block.
+ void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
+
+ // Update region boundaries when removing MI or inserting NewMI before MI.
+ void updateRegionBoundaries(
+ SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
+ MachineBasicBlock::iterator>> &RegionBoundaries,
+ MachineBasicBlock::iterator MI, MachineInstr *NewMI,
+ bool Removing = false);
+
+ void runSchedStages();
+
+public:
+ GCNScheduleDAGMILive(MachineSchedContext *C,
+ std::unique_ptr<MachineSchedStrategy> S);
+
+ void schedule() override;
+
+ void finalizeSchedule() override;
+};
+
+// GCNSchedStrategy applies multiple scheduling stages to a function.
+class GCNSchedStage {
+protected:
+ GCNScheduleDAGMILive &DAG;
+
+ GCNMaxOccupancySchedStrategy &S;
+
+ MachineFunction &MF;
+
+ SIMachineFunctionInfo &MFI;
+
+ const GCNSubtarget &ST;
+
+ const GCNSchedStageID StageID;
+
+ // The current block being scheduled.
+ MachineBasicBlock *CurrentMBB = nullptr;
+
+ // Current region index.
+ unsigned RegionIdx = 0;
+
+ // Record the original order of instructions before scheduling.
+ std::vector<MachineInstr *> Unsched;
+
+ // RP before scheduling the current region.
+ GCNRegPressure PressureBefore;
+
+ // RP after scheduling the current region.
+ GCNRegPressure PressureAfter;
+
+ GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG);
+
+public:
+ // Initialize state for a scheduling stage. Returns false if the current stage
+ // should be skipped.
+ virtual bool initGCNSchedStage();
+
+ // Finalize state after finishing a scheduling pass on the function.
+ virtual void finalizeGCNSchedStage();
+
+ // Setup for scheduling a region. Returns false if the current region should
+ // be skipped.
+ virtual bool initGCNRegion();
+
+ // Track whether a new region is also a new MBB.
+ void setupNewBlock();
+
+ // Finalize state after scheudling a region.
+ virtual void finalizeGCNRegion();
+
+ // Check result of scheduling.
+ void checkScheduling();
+
+ // Returns true if scheduling should be reverted.
+ virtual bool shouldRevertScheduling(unsigned WavesAfter);
+
+ // Returns true if the new schedule may result in more spilling.
+ bool mayCauseSpilling(unsigned WavesAfter);
+
+ // Attempt to revert scheduling for this region.
+ void revertScheduling();
+
+ void advanceRegion() { RegionIdx++; }
+
+ virtual ~GCNSchedStage() = default;
+};
+
+class InitialScheduleStage : public GCNSchedStage {
+public:
+ void finalizeGCNRegion() override;
+
+ bool shouldRevertScheduling(unsigned WavesAfter) override;
+
+ InitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
+ : GCNSchedStage(StageID, DAG) {}
+};
+
+class UnclusteredRescheduleStage : public GCNSchedStage {
+private:
+ std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
+
+public:
+ bool initGCNSchedStage() override;
+
+ void finalizeGCNSchedStage() override;
+
+ bool initGCNRegion() override;
+
+ bool shouldRevertScheduling(unsigned WavesAfter) override;
+
+ UnclusteredRescheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
+ : GCNSchedStage(StageID, DAG) {}
+};
+
+// Retry function scheduling if we found resulting occupancy and it is
+// lower than used for other scheduling passes. This will give more freedom
+// to schedule low register pressure blocks.
+class ClusteredLowOccStage : public GCNSchedStage {
+public:
+ bool initGCNSchedStage() override;
+
+ bool initGCNRegion() override;
+
+ bool shouldRevertScheduling(unsigned WavesAfter) override;
+
+ ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
+ : GCNSchedStage(StageID, DAG) {}
+};
+
+class PreRARematStage : public GCNSchedStage {
+private:
// Each region at MinOccupancy will have their own list of trivially
// rematerializable instructions we can remat to reduce RP. The list maps an
// instruction to the position we should remat before, usually the MI using
@@ -132,12 +294,6 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
// that has the defined reg as a live-in.
DenseMap<MachineInstr *, SmallVector<unsigned, 4>> RematDefToLiveInRegions;
- // Temporary basic block live-in cache.
- DenseMap<const MachineBasicBlock*, GCNRPTracker::LiveRegSet> MBBLiveIns;
-
- DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
- DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveInMap() const;
-
// Collect all trivially rematerializable VGPR instructions with a single def
// and single use outside the defining block into RematerializableInsts.
void collectRematerializableInstructions();
@@ -150,26 +306,15 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
bool sinkTriviallyRematInsts(const GCNSubtarget &ST,
const TargetInstrInfo *TII);
- // Return current region pressure.
- GCNRegPressure getRealRegPressure() const;
-
- // Compute and cache live-ins and pressure for all regions in block.
- void computeBlockPressure(const MachineBasicBlock *MBB);
-
- // Update region boundaries when removing MI or inserting NewMI before MI.
- void updateRegionBoundaries(
- SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
- MachineBasicBlock::iterator>> &RegionBoundaries,
- MachineBasicBlock::iterator MI, MachineInstr *NewMI,
- bool Removing = false);
-
public:
- GCNScheduleDAGMILive(MachineSchedContext *C,
- std::unique_ptr<MachineSchedStrategy> S);
+ bool initGCNSchedStage() override;
- void schedule() override;
+ bool initGCNRegion() override;
- void finalizeSchedule() override;
+ bool shouldRevertScheduling(unsigned WavesAfter) override;
+
+ PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
+ : GCNSchedStage(StageID, DAG) {}
};
} // End namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index e093d78b2cc6..d9d7d4efa8c3 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -309,6 +309,11 @@ uint64_t SIMCCodeEmitter::getImplicitOpSelHiEncoding(int Opcode) const {
return OP_SEL_HI_0 | OP_SEL_HI_1 | OP_SEL_HI_2;
}
+static bool isVCMPX64(const MCInstrDesc &Desc) {
+ return (Desc.TSFlags & SIInstrFlags::VOP3) &&
+ Desc.hasImplicitDefOfPhysReg(AMDGPU::EXEC);
+}
+
void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -326,6 +331,17 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
Encoding |= getImplicitOpSelHiEncoding(Opcode);
}
+ // GFX11 v_cmpx opcodes promoted to VOP3 have implied dst=EXEC.
+ // Documentation requires dst to be encoded as EXEC (0x7E),
+ // but it looks like the actual value encoded for dst operand
+ // is ignored by HW. It was decided to define dst as "do not care"
+ // in td files to allow disassembler accept any dst value.
+ // However, dst is encoded as EXEC for compatibility with SP3.
+ if (AMDGPU::isGFX11Plus(STI) && isVCMPX64(Desc)) {
+ assert((Encoding & 0xFF) == 0);
+ Encoding |= MRI.getEncodingValue(AMDGPU::EXEC_LO);
+ }
+
for (unsigned i = 0; i < bytes; i++) {
OS.write((uint8_t)Encoding.extractBitsAsZExtValue(8, 8 * i));
}
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
index e7706fa0ef5c..1ed79add64c9 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
@@ -54,8 +54,8 @@ public:
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *IsFast = nullptr) const override;
- virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT,
- bool LegalOperations) const override {
+ bool canCombineTruncStore(EVT ValVT, EVT MemVT,
+ bool LegalOperations) const override {
// R600 has "custom" lowering for truncating stores despite not supporting
// those instructions. If we allow that custom lowering in the DAG combiner
// then all truncates are merged into truncating stores, giving worse code
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 438e8b200ecc..f7d139adc63b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2132,7 +2132,8 @@ void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo,
SIMachineFunctionInfo &Info,
CallingConv::ID CallConv,
bool IsShader) const {
- if (Subtarget->hasUserSGPRInit16Bug()) {
+ if (Subtarget->hasUserSGPRInit16Bug() && !IsShader) {
+ // Note: user SGPRs are handled by the front-end for graphics shaders
// Pad up the used user SGPRs with dead inputs.
unsigned CurrentUserSGPRs = Info.getNumUserSGPRs();
@@ -2195,7 +2196,8 @@ void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo,
CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg);
}
- assert(!Subtarget->hasUserSGPRInit16Bug() || Info.getNumPreloadedSGPRs() >= 16);
+ assert(!Subtarget->hasUserSGPRInit16Bug() || IsShader ||
+ Info.getNumPreloadedSGPRs() >= 16);
}
static void reservePrivateMemoryRegs(const TargetMachine &TM,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index d1fecc1afc7f..e0101f53880f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -487,10 +487,10 @@ public:
AtomicExpansionKind
shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
- virtual const TargetRegisterClass *
- getRegClassFor(MVT VT, bool isDivergent) const override;
- virtual bool requiresUniformRegister(MachineFunction &MF,
- const Value *V) const override;
+ const TargetRegisterClass *getRegClassFor(MVT VT,
+ bool isDivergent) const override;
+ bool requiresUniformRegister(MachineFunction &MF,
+ const Value *V) const override;
Align getPrefLoopAlignment(MachineLoop *ML) const override;
void allocateHSAUserSGPRs(CCState &CCInfo,
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index ffe8dce79816..fccb08f86e6d 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -349,7 +349,7 @@ def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add M0_LO16)> {
def SGPR_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
(add (sequence "SGPR%u_LO16", 0, 105))> {
- let AllocationPriority = 9;
+ let AllocationPriority = 0;
let Size = 16;
let GeneratePressureSet = 0;
let HasSGPR = 1;
@@ -368,7 +368,7 @@ def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add (sequence "SGPR%u", 0, 105))> {
// Give all SGPR classes higher priority than VGPR classes, because
// we want to spill SGPRs to VGPRs.
- let AllocationPriority = 9;
+ let AllocationPriority = 0;
let GeneratePressureSet = 0;
let HasSGPR = 1;
}
@@ -528,14 +528,14 @@ def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, p2, p3, p5, p6]>;
let HasVGPR = 1 in {
def VGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
(add (sequence "VGPR%u_LO16", 0, 255))> {
- let AllocationPriority = 1;
+ let AllocationPriority = 0;
let Size = 16;
let GeneratePressureSet = 0;
}
def VGPR_HI16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
(add (sequence "VGPR%u_HI16", 0, 255))> {
- let AllocationPriority = 1;
+ let AllocationPriority = 0;
let Size = 16;
let GeneratePressureSet = 0;
}
@@ -544,7 +544,7 @@ def VGPR_HI16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
// i16/f16 only on VI+
def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
(add (sequence "VGPR%u", 0, 255))> {
- let AllocationPriority = 1;
+ let AllocationPriority = 0;
let Size = 32;
let Weight = 1;
}
@@ -588,7 +588,7 @@ def AGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
// AccVGPR 32-bit registers
def AGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add (sequence "AGPR%u", 0, 255))> {
- let AllocationPriority = 1;
+ let AllocationPriority = 0;
let Size = 32;
let Weight = 1;
}
@@ -653,7 +653,7 @@ def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2
SGPR_NULL, SGPR_NULL_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE,
SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID,
SRC_VCCZ, SRC_EXECZ, SRC_SCC)> {
- let AllocationPriority = 10;
+ let AllocationPriority = 0;
}
def SReg_LO16_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i16, f16], 16,
@@ -663,42 +663,42 @@ def SReg_LO16_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i16, f16], 16,
SRC_SHARED_LIMIT_LO16, SRC_PRIVATE_BASE_LO16, SRC_PRIVATE_LIMIT_LO16,
SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16, SRC_EXECZ_LO16, SRC_SCC_LO16)> {
let Size = 16;
- let AllocationPriority = 10;
+ let AllocationPriority = 0;
}
def SReg_32_XEXEC_HI : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
(add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> {
- let AllocationPriority = 10;
+ let AllocationPriority = 0;
}
def SReg_LO16_XEXEC_HI : SIRegisterClass<"AMDGPU", [i16, f16], 16,
(add SReg_LO16_XM0_XEXEC, EXEC_LO_LO16, M0_CLASS_LO16)> {
let Size = 16;
- let AllocationPriority = 10;
+ let AllocationPriority = 0;
}
def SReg_32_XM0 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
(add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> {
- let AllocationPriority = 10;
+ let AllocationPriority = 0;
}
def SReg_LO16_XM0 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
(add SReg_LO16_XM0_XEXEC, EXEC_LO_LO16, EXEC_HI_LO16)> {
let Size = 16;
- let AllocationPriority = 10;
+ let AllocationPriority = 0;
}
def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
(add SGPR_LO16, SReg_LO16_XM0, M0_CLASS_LO16, EXEC_LO_LO16, EXEC_HI_LO16, SReg_LO16_XEXEC_HI)> {
let Size = 16;
- let AllocationPriority = 10;
+ let AllocationPriority = 0;
}
} // End GeneratePressureSet = 0
// Register class for all scalar registers (SGPRs + Special Registers)
def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
(add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI)> {
- let AllocationPriority = 10;
+ let AllocationPriority = 0;
let HasSGPR = 1;
}
@@ -712,7 +712,7 @@ def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16],
def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32,
(add SGPR_64Regs)> {
let CopyCost = 1;
- let AllocationPriority = 11;
+ let AllocationPriority = 1;
let HasSGPR = 1;
}
@@ -725,14 +725,14 @@ def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32,
def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
(add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SGPR_NULL64, TTMP_64, TBA, TMA)> {
let CopyCost = 1;
- let AllocationPriority = 13;
+ let AllocationPriority = 1;
let HasSGPR = 1;
}
def SReg_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
(add SReg_64_XEXEC, EXEC)> {
let CopyCost = 1;
- let AllocationPriority = 13;
+ let AllocationPriority = 1;
let HasSGPR = 1;
}
@@ -750,7 +750,7 @@ def SReg_1 : SIRegisterClass<"AMDGPU", [i1], 32,
let HasSGPR = 1;
}
-multiclass SRegClass<int numRegs, int priority,
+multiclass SRegClass<int numRegs,
list<ValueType> regTypes,
SIRegisterTuples regList,
SIRegisterTuples ttmpList = regList,
@@ -760,7 +760,7 @@ multiclass SRegClass<int numRegs, int priority,
defvar sgprName = !strconcat("SGPR_", suffix);
defvar ttmpName = !strconcat("TTMP_", suffix);
- let AllocationPriority = priority, CopyCost = copyCost, HasSGPR = 1 in {
+ let AllocationPriority = !sub(numRegs, 1), CopyCost = copyCost, HasSGPR = 1 in {
def "" # sgprName : SIRegisterClass<"AMDGPU", regTypes, 32, (add regList)> {
}
@@ -781,14 +781,14 @@ multiclass SRegClass<int numRegs, int priority,
}
}
-defm "" : SRegClass<3, 14, [v3i32, v3f32], SGPR_96Regs, TTMP_96Regs>;
-defm "" : SRegClass<4, 15, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], SGPR_128Regs, TTMP_128Regs>;
-defm "" : SRegClass<5, 16, [v5i32, v5f32], SGPR_160Regs, TTMP_160Regs>;
-defm "" : SRegClass<6, 17, [v6i32, v6f32, v3i64, v3f64], SGPR_192Regs, TTMP_192Regs>;
-defm "" : SRegClass<7, 18, [v7i32, v7f32], SGPR_224Regs, TTMP_224Regs>;
-defm "" : SRegClass<8, 19, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16], SGPR_256Regs, TTMP_256Regs>;
-defm "" : SRegClass<16, 20, [v16i32, v16f32, v8i64, v8f64], SGPR_512Regs, TTMP_512Regs>;
-defm "" : SRegClass<32, 21, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>;
+defm "" : SRegClass<3, [v3i32, v3f32], SGPR_96Regs, TTMP_96Regs>;
+defm "" : SRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], SGPR_128Regs, TTMP_128Regs>;
+defm "" : SRegClass<5, [v5i32, v5f32], SGPR_160Regs, TTMP_160Regs>;
+defm "" : SRegClass<6, [v6i32, v6f32, v3i64, v3f64], SGPR_192Regs, TTMP_192Regs>;
+defm "" : SRegClass<7, [v7i32, v7f32], SGPR_224Regs, TTMP_224Regs>;
+defm "" : SRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16], SGPR_256Regs, TTMP_256Regs>;
+defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64], SGPR_512Regs, TTMP_512Regs>;
+defm "" : SRegClass<32, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>;
def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add VGPR_32, LDS_DIRECT_CLASS)> {
@@ -803,7 +803,7 @@ class VRegClassBase<int numRegs, list<ValueType> regTypes, dag regList> :
// Requires n v_mov_b32 to copy
let CopyCost = numRegs;
- let AllocationPriority = numRegs;
+ let AllocationPriority = !sub(numRegs, 1);
let Weight = numRegs;
}
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index d489a089ac78..5973d32c91d6 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -718,7 +718,7 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType vt> :
// DPP8 forbids modifiers and can inherit from VOPC_Profile
let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
- dag InsPartVOP3DPP = (ins Src0Mod:$src0_modifiers, VGPRSrc_32:$src0, VGPRSrc_32:$src1);
+ dag InsPartVOP3DPP = (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, VGPRSrc_32:$src1);
let InsVOP3Base = !con(InsPartVOP3DPP, !if(HasOpSel, (ins op_sel0:$op_sel),
(ins)));
let Asm64 = "$sdst, $src0_modifiers, $src1";
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 9acd49292268..f81495985405 100644
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -139,6 +139,9 @@ public:
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &CStream) const override;
+ uint64_t suggestBytesToSkip(ArrayRef<uint8_t> Bytes,
+ uint64_t Address) const override;
+
private:
DecodeStatus getARMInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -739,6 +742,33 @@ static DecodeStatus checkDecodedInstruction(MCInst &MI, uint64_t &Size,
}
}
+uint64_t ARMDisassembler::suggestBytesToSkip(ArrayRef<uint8_t> Bytes,
+ uint64_t Address) const {
+ // In Arm state, instructions are always 4 bytes wide, so there's no
+ // point in skipping any smaller number of bytes if an instruction
+ // can't be decoded.
+ if (!STI.getFeatureBits()[ARM::ModeThumb])
+ return 4;
+
+ // In a Thumb instruction stream, a halfword is a standalone 2-byte
+ // instruction if and only if its value is less than 0xE800.
+ // Otherwise, it's the first halfword of a 4-byte instruction.
+ //
+ // So, if we can see the upcoming halfword, we can judge on that
+ // basis, and maybe skip a whole 4-byte instruction that we don't
+ // know how to decode, without accidentally trying to interpret its
+ // second half as something else.
+ //
+ // If we don't have the instruction data available, we just have to
+ // recommend skipping the minimum sensible distance, which is 2
+ // bytes.
+ if (Bytes.size() < 2)
+ return 2;
+
+ uint16_t Insn16 = (Bytes[1] << 8) | Bytes[0];
+ return Insn16 < 0xE800 ? 2 : 4;
+}
+
DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
new file mode 100644
index 000000000000..1985bee8e0ae
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
@@ -0,0 +1,324 @@
+//===- DXILOpBuilder.cpp - Helper class for build DIXLOp functions --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This file contains class to help build DXIL op functions.
+//===----------------------------------------------------------------------===//
+
+#include "DXILOpBuilder.h"
+#include "DXILConstants.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/DXILOperationCommon.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+using namespace llvm::DXIL;
+
+constexpr StringLiteral DXILOpNamePrefix = "dx.op.";
+
+namespace {
+
+enum OverloadKind : uint16_t {
+ VOID = 1,
+ HALF = 1 << 1,
+ FLOAT = 1 << 2,
+ DOUBLE = 1 << 3,
+ I1 = 1 << 4,
+ I8 = 1 << 5,
+ I16 = 1 << 6,
+ I32 = 1 << 7,
+ I64 = 1 << 8,
+ UserDefineType = 1 << 9,
+ ObjectType = 1 << 10,
+};
+
+} // namespace
+
+static const char *getOverloadTypeName(OverloadKind Kind) {
+ switch (Kind) {
+ case OverloadKind::HALF:
+ return "f16";
+ case OverloadKind::FLOAT:
+ return "f32";
+ case OverloadKind::DOUBLE:
+ return "f64";
+ case OverloadKind::I1:
+ return "i1";
+ case OverloadKind::I8:
+ return "i8";
+ case OverloadKind::I16:
+ return "i16";
+ case OverloadKind::I32:
+ return "i32";
+ case OverloadKind::I64:
+ return "i64";
+ case OverloadKind::VOID:
+ case OverloadKind::ObjectType:
+ case OverloadKind::UserDefineType:
+ break;
+ }
+ llvm_unreachable("invalid overload type for name");
+ return "void";
+}
+
+static OverloadKind getOverloadKind(Type *Ty) {
+ Type::TypeID T = Ty->getTypeID();
+ switch (T) {
+ case Type::VoidTyID:
+ return OverloadKind::VOID;
+ case Type::HalfTyID:
+ return OverloadKind::HALF;
+ case Type::FloatTyID:
+ return OverloadKind::FLOAT;
+ case Type::DoubleTyID:
+ return OverloadKind::DOUBLE;
+ case Type::IntegerTyID: {
+ IntegerType *ITy = cast<IntegerType>(Ty);
+ unsigned Bits = ITy->getBitWidth();
+ switch (Bits) {
+ case 1:
+ return OverloadKind::I1;
+ case 8:
+ return OverloadKind::I8;
+ case 16:
+ return OverloadKind::I16;
+ case 32:
+ return OverloadKind::I32;
+ case 64:
+ return OverloadKind::I64;
+ default:
+ llvm_unreachable("invalid overload type");
+ return OverloadKind::VOID;
+ }
+ }
+ case Type::PointerTyID:
+ return OverloadKind::UserDefineType;
+ case Type::StructTyID:
+ return OverloadKind::ObjectType;
+ default:
+ llvm_unreachable("invalid overload type");
+ return OverloadKind::VOID;
+ }
+}
+
+static std::string getTypeName(OverloadKind Kind, Type *Ty) {
+ if (Kind < OverloadKind::UserDefineType) {
+ return getOverloadTypeName(Kind);
+ } else if (Kind == OverloadKind::UserDefineType) {
+ StructType *ST = cast<StructType>(Ty);
+ return ST->getStructName().str();
+ } else if (Kind == OverloadKind::ObjectType) {
+ StructType *ST = cast<StructType>(Ty);
+ return ST->getStructName().str();
+ } else {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ Ty->print(OS);
+ return OS.str();
+ }
+}
+
+// Static properties.
+struct OpCodeProperty {
+ DXIL::OpCode OpCode;
+ // Offset in DXILOpCodeNameTable.
+ unsigned OpCodeNameOffset;
+ DXIL::OpCodeClass OpCodeClass;
+ // Offset in DXILOpCodeClassNameTable.
+ unsigned OpCodeClassNameOffset;
+ uint16_t OverloadTys;
+ llvm::Attribute::AttrKind FuncAttr;
+ int OverloadParamIndex; // parameter index which control the overload.
+ // When < 0, should be only 1 overload type.
+ unsigned NumOfParameters; // Number of parameters include return value.
+ unsigned ParameterTableOffset; // Offset in ParameterTable.
+};
+
+// Include getOpCodeClassName getOpCodeProperty, getOpCodeName and
+// getOpCodeParameterKind which generated by tableGen.
+#define DXIL_OP_OPERATION_TABLE
+#include "DXILOperation.inc"
+#undef DXIL_OP_OPERATION_TABLE
+
+static std::string constructOverloadName(OverloadKind Kind, Type *Ty,
+ const OpCodeProperty &Prop) {
+ if (Kind == OverloadKind::VOID) {
+ return (Twine(DXILOpNamePrefix) + getOpCodeClassName(Prop)).str();
+ }
+ return (Twine(DXILOpNamePrefix) + getOpCodeClassName(Prop) + "." +
+ getTypeName(Kind, Ty))
+ .str();
+}
+
+static std::string constructOverloadTypeName(OverloadKind Kind,
+ StringRef TypeName) {
+ if (Kind == OverloadKind::VOID)
+ return TypeName.str();
+
+ assert(Kind < OverloadKind::UserDefineType && "invalid overload kind");
+ return (Twine(TypeName) + getOverloadTypeName(Kind)).str();
+}
+
+static StructType *getOrCreateStructType(StringRef Name,
+ ArrayRef<Type *> EltTys,
+ LLVMContext &Ctx) {
+ StructType *ST = StructType::getTypeByName(Ctx, Name);
+ if (ST)
+ return ST;
+
+ return StructType::create(Ctx, EltTys, Name);
+}
+
+static StructType *getResRetType(Type *OverloadTy, LLVMContext &Ctx) {
+ OverloadKind Kind = getOverloadKind(OverloadTy);
+ std::string TypeName = constructOverloadTypeName(Kind, "dx.types.ResRet.");
+ Type *FieldTypes[5] = {OverloadTy, OverloadTy, OverloadTy, OverloadTy,
+ Type::getInt32Ty(Ctx)};
+ return getOrCreateStructType(TypeName, FieldTypes, Ctx);
+}
+
+static StructType *getHandleType(LLVMContext &Ctx) {
+ return getOrCreateStructType("dx.types.Handle", Type::getInt8PtrTy(Ctx), Ctx);
+}
+
+static Type *getTypeFromParameterKind(ParameterKind Kind, Type *OverloadTy) {
+ auto &Ctx = OverloadTy->getContext();
+ switch (Kind) {
+ case ParameterKind::VOID:
+ return Type::getVoidTy(Ctx);
+ case ParameterKind::HALF:
+ return Type::getHalfTy(Ctx);
+ case ParameterKind::FLOAT:
+ return Type::getFloatTy(Ctx);
+ case ParameterKind::DOUBLE:
+ return Type::getDoubleTy(Ctx);
+ case ParameterKind::I1:
+ return Type::getInt1Ty(Ctx);
+ case ParameterKind::I8:
+ return Type::getInt8Ty(Ctx);
+ case ParameterKind::I16:
+ return Type::getInt16Ty(Ctx);
+ case ParameterKind::I32:
+ return Type::getInt32Ty(Ctx);
+ case ParameterKind::I64:
+ return Type::getInt64Ty(Ctx);
+ case ParameterKind::OVERLOAD:
+ return OverloadTy;
+ case ParameterKind::RESOURCE_RET:
+ return getResRetType(OverloadTy, Ctx);
+ case ParameterKind::DXIL_HANDLE:
+ return getHandleType(Ctx);
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid parameter kind");
+ return nullptr;
+}
+
+static FunctionType *getDXILOpFunctionType(const OpCodeProperty *Prop,
+ Type *OverloadTy) {
+ SmallVector<Type *> ArgTys;
+
+ auto ParamKinds = getOpCodeParameterKind(*Prop);
+
+ for (unsigned I = 0; I < Prop->NumOfParameters; ++I) {
+ ParameterKind Kind = ParamKinds[I];
+ ArgTys.emplace_back(getTypeFromParameterKind(Kind, OverloadTy));
+ }
+ return FunctionType::get(
+ ArgTys[0], ArrayRef<Type *>(&ArgTys[1], ArgTys.size() - 1), false);
+}
+
+static FunctionCallee getOrCreateDXILOpFunction(DXIL::OpCode DXILOp,
+ Type *OverloadTy, Module &M) {
+ const OpCodeProperty *Prop = getOpCodeProperty(DXILOp);
+
+ OverloadKind Kind = getOverloadKind(OverloadTy);
+ // FIXME: find the issue and report error in clang instead of check it in
+ // backend.
+ if ((Prop->OverloadTys & (uint16_t)Kind) == 0) {
+ llvm_unreachable("invalid overload");
+ }
+
+ std::string FnName = constructOverloadName(Kind, OverloadTy, *Prop);
+ // Dependent on name to dedup.
+ if (auto *Fn = M.getFunction(FnName))
+ return FunctionCallee(Fn);
+
+ FunctionType *DXILOpFT = getDXILOpFunctionType(Prop, OverloadTy);
+ return M.getOrInsertFunction(FnName, DXILOpFT);
+}
+
+namespace llvm {
+namespace DXIL {
+
+CallInst *DXILOpBuilder::createDXILOpCall(DXIL::OpCode OpCode, Type *OverloadTy,
+ llvm::iterator_range<Use *> Args) {
+ auto Fn = getOrCreateDXILOpFunction(OpCode, OverloadTy, M);
+ SmallVector<Value *> FullArgs;
+ FullArgs.emplace_back(B.getInt32((int32_t)OpCode));
+ FullArgs.append(Args.begin(), Args.end());
+ return B.CreateCall(Fn, FullArgs);
+}
+
+Type *DXILOpBuilder::getOverloadTy(DXIL::OpCode OpCode, FunctionType *FT,
+ bool NoOpCodeParam) {
+
+ const OpCodeProperty *Prop = getOpCodeProperty(OpCode);
+ if (Prop->OverloadParamIndex < 0) {
+ auto &Ctx = FT->getContext();
+ // When only has 1 overload type, just return it.
+ switch (Prop->OverloadTys) {
+ case OverloadKind::VOID:
+ return Type::getVoidTy(Ctx);
+ case OverloadKind::HALF:
+ return Type::getHalfTy(Ctx);
+ case OverloadKind::FLOAT:
+ return Type::getFloatTy(Ctx);
+ case OverloadKind::DOUBLE:
+ return Type::getDoubleTy(Ctx);
+ case OverloadKind::I1:
+ return Type::getInt1Ty(Ctx);
+ case OverloadKind::I8:
+ return Type::getInt8Ty(Ctx);
+ case OverloadKind::I16:
+ return Type::getInt16Ty(Ctx);
+ case OverloadKind::I32:
+ return Type::getInt32Ty(Ctx);
+ case OverloadKind::I64:
+ return Type::getInt64Ty(Ctx);
+ default:
+ llvm_unreachable("invalid overload type");
+ return nullptr;
+ }
+ }
+
+ // Prop->OverloadParamIndex is 0, overload type is FT->getReturnType().
+ Type *OverloadType = FT->getReturnType();
+ if (Prop->OverloadParamIndex != 0) {
+ // Skip Return Type and Type for DXIL opcode.
+ const unsigned SkipedParam = NoOpCodeParam ? 2 : 1;
+ OverloadType = FT->getParamType(Prop->OverloadParamIndex - SkipedParam);
+ }
+
+ auto ParamKinds = getOpCodeParameterKind(*Prop);
+ auto Kind = ParamKinds[Prop->OverloadParamIndex];
+ // For ResRet and CBufferRet, OverloadTy is in field of StructType.
+ if (Kind == ParameterKind::CBUFFER_RET ||
+ Kind == ParameterKind::RESOURCE_RET) {
+ auto *ST = cast<StructType>(OverloadType);
+ OverloadType = ST->getElementType(0);
+ }
+ return OverloadType;
+}
+
+const char *DXILOpBuilder::getOpCodeName(DXIL::OpCode DXILOp) {
+ return ::getOpCodeName(DXILOp);
+}
+} // namespace DXIL
+} // namespace llvm
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.h b/llvm/lib/Target/DirectX/DXILOpBuilder.h
new file mode 100644
index 000000000000..0cc39e845b71
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.h
@@ -0,0 +1,46 @@
+//===- DXILOpBuilder.h - Helper class for build DIXLOp functions ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This file contains class to help build DXIL op functions.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_DIRECTX_DXILOPBUILDER_H
+#define LLVM_LIB_TARGET_DIRECTX_DXILOPBUILDER_H
+
+#include "DXILConstants.h"
+#include "llvm/ADT/iterator_range.h"
+
+namespace llvm {
+class Module;
+class IRBuilderBase;
+class CallInst;
+class Value;
+class Type;
+class FunctionType;
+class Use;
+
+namespace DXIL {
+
+class DXILOpBuilder {
+public:
+ DXILOpBuilder(Module &M, IRBuilderBase &B) : M(M), B(B) {}
+ CallInst *createDXILOpCall(DXIL::OpCode OpCode, Type *OverloadTy,
+ llvm::iterator_range<Use *> Args);
+ Type *getOverloadTy(DXIL::OpCode OpCode, FunctionType *FT,
+ bool NoOpCodeParam);
+ static const char *getOpCodeName(DXIL::OpCode DXILOp);
+
+private:
+ Module &M;
+ IRBuilderBase &B;
+};
+
+} // namespace DXIL
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 11b89e4ec890..20c08f47745d 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "DXILConstants.h"
+#include "DXILOpBuilder.h"
#include "DirectX.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/Passes.h"
@@ -28,168 +29,12 @@
using namespace llvm;
using namespace llvm::DXIL;
-constexpr StringLiteral DXILOpNamePrefix = "dx.op.";
-
-enum OverloadKind : uint16_t {
- VOID = 1,
- HALF = 1 << 1,
- FLOAT = 1 << 2,
- DOUBLE = 1 << 3,
- I1 = 1 << 4,
- I8 = 1 << 5,
- I16 = 1 << 6,
- I32 = 1 << 7,
- I64 = 1 << 8,
- UserDefineType = 1 << 9,
- ObjectType = 1 << 10,
-};
-
-static const char *getOverloadTypeName(OverloadKind Kind) {
- switch (Kind) {
- case OverloadKind::HALF:
- return "f16";
- case OverloadKind::FLOAT:
- return "f32";
- case OverloadKind::DOUBLE:
- return "f64";
- case OverloadKind::I1:
- return "i1";
- case OverloadKind::I8:
- return "i8";
- case OverloadKind::I16:
- return "i16";
- case OverloadKind::I32:
- return "i32";
- case OverloadKind::I64:
- return "i64";
- case OverloadKind::VOID:
- case OverloadKind::ObjectType:
- case OverloadKind::UserDefineType:
- break;
- }
- llvm_unreachable("invalid overload type for name");
- return "void";
-}
-
-static OverloadKind getOverloadKind(Type *Ty) {
- Type::TypeID T = Ty->getTypeID();
- switch (T) {
- case Type::VoidTyID:
- return OverloadKind::VOID;
- case Type::HalfTyID:
- return OverloadKind::HALF;
- case Type::FloatTyID:
- return OverloadKind::FLOAT;
- case Type::DoubleTyID:
- return OverloadKind::DOUBLE;
- case Type::IntegerTyID: {
- IntegerType *ITy = cast<IntegerType>(Ty);
- unsigned Bits = ITy->getBitWidth();
- switch (Bits) {
- case 1:
- return OverloadKind::I1;
- case 8:
- return OverloadKind::I8;
- case 16:
- return OverloadKind::I16;
- case 32:
- return OverloadKind::I32;
- case 64:
- return OverloadKind::I64;
- default:
- llvm_unreachable("invalid overload type");
- return OverloadKind::VOID;
- }
- }
- case Type::PointerTyID:
- return OverloadKind::UserDefineType;
- case Type::StructTyID:
- return OverloadKind::ObjectType;
- default:
- llvm_unreachable("invalid overload type");
- return OverloadKind::VOID;
- }
-}
-
-static std::string getTypeName(OverloadKind Kind, Type *Ty) {
- if (Kind < OverloadKind::UserDefineType) {
- return getOverloadTypeName(Kind);
- } else if (Kind == OverloadKind::UserDefineType) {
- StructType *ST = cast<StructType>(Ty);
- return ST->getStructName().str();
- } else if (Kind == OverloadKind::ObjectType) {
- StructType *ST = cast<StructType>(Ty);
- return ST->getStructName().str();
- } else {
- std::string Str;
- raw_string_ostream OS(Str);
- Ty->print(OS);
- return OS.str();
- }
-}
-
-// Static properties.
-struct OpCodeProperty {
- DXIL::OpCode OpCode;
- // Offset in DXILOpCodeNameTable.
- unsigned OpCodeNameOffset;
- DXIL::OpCodeClass OpCodeClass;
- // Offset in DXILOpCodeClassNameTable.
- unsigned OpCodeClassNameOffset;
- uint16_t OverloadTys;
- llvm::Attribute::AttrKind FuncAttr;
-};
-
-// Include getOpCodeClassName getOpCodeProperty and getOpCodeName which
-// generated by tableGen.
-#define DXIL_OP_OPERATION_TABLE
-#include "DXILOperation.inc"
-#undef DXIL_OP_OPERATION_TABLE
-
-static std::string constructOverloadName(OverloadKind Kind, Type *Ty,
- const OpCodeProperty &Prop) {
- if (Kind == OverloadKind::VOID) {
- return (Twine(DXILOpNamePrefix) + getOpCodeClassName(Prop)).str();
- }
- return (Twine(DXILOpNamePrefix) + getOpCodeClassName(Prop) + "." +
- getTypeName(Kind, Ty))
- .str();
-}
-
-static FunctionCallee createDXILOpFunction(DXIL::OpCode DXILOp, Function &F,
- Module &M) {
- const OpCodeProperty *Prop = getOpCodeProperty(DXILOp);
-
- // Get return type as overload type for DXILOp.
- // Only simple mapping case here, so return type is good enough.
- Type *OverloadTy = F.getReturnType();
-
- OverloadKind Kind = getOverloadKind(OverloadTy);
- // FIXME: find the issue and report error in clang instead of check it in
- // backend.
- if ((Prop->OverloadTys & (uint16_t)Kind) == 0) {
- llvm_unreachable("invalid overload");
- }
-
- std::string FnName = constructOverloadName(Kind, OverloadTy, *Prop);
- assert(!M.getFunction(FnName) && "Function already exists");
-
- auto &Ctx = M.getContext();
- Type *OpCodeTy = Type::getInt32Ty(Ctx);
-
- SmallVector<Type *> ArgTypes;
- // DXIL has i32 opcode as first arg.
- ArgTypes.emplace_back(OpCodeTy);
- FunctionType *FT = F.getFunctionType();
- ArgTypes.append(FT->param_begin(), FT->param_end());
- FunctionType *DXILOpFT = FunctionType::get(OverloadTy, ArgTypes, false);
- return M.getOrInsertFunction(FnName, DXILOpFT);
-}
-
static void lowerIntrinsic(DXIL::OpCode DXILOp, Function &F, Module &M) {
- auto DXILOpFn = createDXILOpFunction(DXILOp, F, M);
IRBuilder<> B(M.getContext());
Value *DXILOpArg = B.getInt32(static_cast<unsigned>(DXILOp));
+ DXILOpBuilder DXILB(M, B);
+ Type *OverloadTy =
+ DXILB.getOverloadTy(DXILOp, F.getFunctionType(), /*NoOpCodeParam*/ true);
for (User *U : make_early_inc_range(F.users())) {
CallInst *CI = dyn_cast<CallInst>(U);
if (!CI)
@@ -199,8 +44,8 @@ static void lowerIntrinsic(DXIL::OpCode DXILOp, Function &F, Module &M) {
Args.emplace_back(DXILOpArg);
Args.append(CI->arg_begin(), CI->arg_end());
B.SetInsertPoint(CI);
- CallInst *DXILCI = B.CreateCall(DXILOpFn, Args);
- LLVM_DEBUG(DXILCI->setName(getOpCodeName(DXILOp)));
+ CallInst *DXILCI = DXILB.createDXILOpCall(DXILOp, OverloadTy, CI->args());
+
CI->replaceAllUsesWith(DXILCI);
CI->eraseFromParent();
}
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp
index e2a41515de38..a873662f730d 100644
--- a/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp
@@ -260,9 +260,7 @@ static void predictValueUseListOrderImpl(const Value *V, const Function *F,
return LU->getOperandNo() > RU->getOperandNo();
});
- if (llvm::is_sorted(List, [](const Entry &L, const Entry &R) {
- return L.second < R.second;
- }))
+ if (llvm::is_sorted(List, llvm::less_second()))
// Order is already correct.
return;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h
index 1e50385a7b4b..505c90f66f43 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h
@@ -95,7 +95,6 @@ public:
void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl);
void SelectStore(SDNode *N);
void SelectSHL(SDNode *N);
- void SelectZeroExtend(SDNode *N);
void SelectIntrinsicWChain(SDNode *N);
void SelectIntrinsicWOChain(SDNode *N);
void SelectConstant(SDNode *N);
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 9561dfe8a35d..1dc6a4cb9c89 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -107,9 +107,6 @@ class HexagonTargetLowering : public TargetLowering {
const HexagonTargetMachine &HTM;
const HexagonSubtarget &Subtarget;
- bool CanReturnSmallStruct(const Function* CalleeFn, unsigned& RetSize)
- const;
-
public:
explicit HexagonTargetLowering(const TargetMachine &TM,
const HexagonSubtarget &ST);
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index c8e6276aa4de..b8671f26d124 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -2253,15 +2253,6 @@ bool HexagonInstrInfo::isDuplexPair(const MachineInstr &MIa,
return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG));
}
-bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr &MI) const {
- if (MI.mayLoadOrStore() || MI.isCompare())
- return true;
-
- // Multiply
- unsigned SchedClass = MI.getDesc().getSchedClass();
- return is_TC4x(SchedClass) || is_TC3x(SchedClass);
-}
-
bool HexagonInstrInfo::isEndLoopN(unsigned Opcode) const {
return (Opcode == Hexagon::ENDLOOP0 ||
Opcode == Hexagon::ENDLOOP1);
@@ -2417,43 +2408,6 @@ bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr &MI,
}
}
-bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr &LRMI,
- const MachineInstr &ESMI) const {
- bool isLate = isLateResultInstr(LRMI);
- bool isEarly = isEarlySourceInstr(ESMI);
-
- LLVM_DEBUG(dbgs() << "V60" << (isLate ? "-LR " : " -- "));
- LLVM_DEBUG(LRMI.dump());
- LLVM_DEBUG(dbgs() << "V60" << (isEarly ? "-ES " : " -- "));
- LLVM_DEBUG(ESMI.dump());
-
- if (isLate && isEarly) {
- LLVM_DEBUG(dbgs() << "++Is Late Result feeding Early Source\n");
- return true;
- }
-
- return false;
-}
-
-bool HexagonInstrInfo::isLateResultInstr(const MachineInstr &MI) const {
- switch (MI.getOpcode()) {
- case TargetOpcode::EXTRACT_SUBREG:
- case TargetOpcode::INSERT_SUBREG:
- case TargetOpcode::SUBREG_TO_REG:
- case TargetOpcode::REG_SEQUENCE:
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::COPY:
- case TargetOpcode::INLINEASM:
- case TargetOpcode::PHI:
- return false;
- default:
- break;
- }
-
- unsigned SchedClass = MI.getDesc().getSchedClass();
- return !is_TC1(SchedClass);
-}
-
bool HexagonInstrInfo::isLateSourceInstr(const MachineInstr &MI) const {
// Instructions with iclass A_CVI_VX and attribute A_CVI_LATE uses a multiply
// resource, but all operands can be received late like an ALU instruction.
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index 2af09c857d86..703a894132bb 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -363,7 +363,6 @@ public:
bool isDotCurInst(const MachineInstr &MI) const;
bool isDotNewInst(const MachineInstr &MI) const;
bool isDuplexPair(const MachineInstr &MIa, const MachineInstr &MIb) const;
- bool isEarlySourceInstr(const MachineInstr &MI) const;
bool isEndLoopN(unsigned Opcode) const;
bool isExpr(unsigned OpType) const;
bool isExtendable(const MachineInstr &MI) const;
@@ -375,9 +374,6 @@ public:
bool isIndirectL4Return(const MachineInstr &MI) const;
bool isJumpR(const MachineInstr &MI) const;
bool isJumpWithinBranchRange(const MachineInstr &MI, unsigned offset) const;
- bool isLateInstrFeedsEarlyInstr(const MachineInstr &LRMI,
- const MachineInstr &ESMI) const;
- bool isLateResultInstr(const MachineInstr &MI) const;
bool isLateSourceInstr(const MachineInstr &MI) const;
bool isLoopN(const MachineInstr &MI) const;
bool isMemOp(const MachineInstr &MI) const;
diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
index d11f5a9080a0..9793c7bc3532 100644
--- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
+++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
@@ -248,7 +248,7 @@ public:
addExpr(Inst, getImm());
}
};
-} // end anonymous namespace
+} // end namespace
#define GET_REGISTER_MATCHER
#define GET_SUBTARGET_FEATURE_NAME
diff --git a/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp b/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp
index 215d061f11f2..beb757c78596 100644
--- a/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp
+++ b/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp
@@ -39,7 +39,7 @@ public:
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &CStream) const override;
};
-} // end anonymous namespace
+} // end namespace
static MCDisassembler *createLoongArchDisassembler(const Target &T,
const MCSubtargetInfo &STI,
diff --git a/llvm/lib/Target/LoongArch/LoongArch.h b/llvm/lib/Target/LoongArch/LoongArch.h
index caa7bd31e28b..e6c9c24dd1b2 100644
--- a/llvm/lib/Target/LoongArch/LoongArch.h
+++ b/llvm/lib/Target/LoongArch/LoongArch.h
@@ -33,6 +33,6 @@ bool lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO,
const AsmPrinter &AP);
FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM);
-} // namespace llvm
+} // end namespace llvm
#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H
diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h
index 014b666de711..72d8e006a0bb 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h
@@ -52,5 +52,5 @@ private:
const DebugLoc &DL, Register DestReg, Register SrcReg,
int64_t Val, MachineInstr::MIFlag Flag) const;
};
-} // namespace llvm
+} // end namespace llvm
#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
index 7ad329a64424..8c9357d75979 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
@@ -55,6 +55,6 @@ public:
#include "LoongArchGenDAGISel.inc"
};
-} // namespace llvm
+} // end namespace llvm
#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELDAGTODAG_H
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 279550482675..141f1fd3a55d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -45,7 +45,7 @@ enum NodeType : unsigned {
BSTRPICK,
};
-} // namespace LoongArchISD
+} // end namespace LoongArchISD
class LoongArchTargetLowering : public TargetLowering {
const LoongArchSubtarget &Subtarget;
diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h
index 02c9156e2b87..cca130c3bc3a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h
@@ -45,6 +45,6 @@ struct LoongArchRegisterInfo : public LoongArchGenRegisterInfo {
Register getFrameRegister(const MachineFunction &MF) const override;
};
-} // namespace llvm
+} // end namespace llvm
#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHREGISTERINFO_H
diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
index 95c2c676cc3c..fbe7a176b371 100644
--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
@@ -84,6 +84,6 @@ public:
unsigned getGRLen() const { return GRLen; }
LoongArchABI::ABI getTargetABI() const { return TargetABI; }
};
-} // namespace llvm
+} // end namespace llvm
#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSUBTARGET_H
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
index 2d08d5c674bc..7ba5848e0997 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -103,7 +103,7 @@ public:
void addIRPasses() override;
bool addInstSelector() override;
};
-} // namespace
+} // end namespace
TargetPassConfig *
LoongArchTargetMachine::createPassConfig(PassManagerBase &PM) {
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
index 77bbfb095747..a5f0b816c972 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
@@ -58,6 +58,6 @@ public:
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override;
};
-} // namespace llvm
+} // end namespace llvm
#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHASMBACKEND_H
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp
index f0c985883125..de2ba2833414 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp
@@ -35,6 +35,6 @@ ABI getTargetABI(StringRef ABIName) {
// FIXME: other register?
MCRegister getBPReg() { return LoongArch::R31; }
-} // namespace LoongArchABI
+} // end namespace LoongArchABI
-} // namespace llvm
+} // end namespace llvm
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
index e26f22de0cbc..fee247a0c02c 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
@@ -37,8 +37,8 @@ ABI getTargetABI(StringRef ABIName);
// Returns the register used to hold the stack pointer after realignment.
MCRegister getBPReg();
-} // namespace LoongArchABI
+} // end namespace LoongArchABI
-} // namespace llvm
+} // end namespace llvm
#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHBASEINFO_H
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
index 95e1314f363a..1850b0d8a756 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
@@ -33,7 +33,7 @@ protected:
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
};
-} // namespace
+} // end namespace
LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit)
: MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_LOONGARCH,
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h
index 727fc6a3e1f3..0cbb3d73cd03 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h
@@ -44,6 +44,6 @@ private:
void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
};
-} // namespace llvm
+} // end namespace llvm
#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHINSTPRINTER_H
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h
index 1cf8a2fdf8aa..ed1abbf46153 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h
@@ -25,6 +25,6 @@ public:
explicit LoongArchMCAsmInfo(const Triple &TargetTriple);
};
-} // namespace llvm
+} // end namespace llvm
#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCASMINFO_H
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
index 9c6a4f39b9ea..01a370a90403 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
@@ -69,7 +69,7 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
};
-} // end anonymous namespace
+} // end namespace
unsigned
LoongArchMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
index e50761ab1e27..8d71235f6a81 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
@@ -95,7 +95,7 @@ public:
}
};
-} // end anonymous namespace
+} // end namespace
static MCInstrAnalysis *createLoongArchInstrAnalysis(const MCInstrInfo *Info) {
return new LoongArchMCInstrAnalysis(Info);
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h
index a606ccdbc47c..ab35a0096c8a 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h
@@ -38,7 +38,7 @@ MCAsmBackend *createLoongArchAsmBackend(const Target &T,
std::unique_ptr<MCObjectTargetWriter>
createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit);
-} // namespace llvm
+} // end namespace llvm
// Defines symbolic names for LoongArch registers.
#define GET_REGINFO_ENUM
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h
index 945aa91e40c0..be1b425894de 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h
@@ -24,7 +24,7 @@ using InstSeq = SmallVector<Inst, 4>;
// Helper to generate an instruction sequence that will materialise the given
// immediate value into a register.
InstSeq generateInstSeq(int64_t Val);
-} // namespace LoongArchMatInt
-} // namespace llvm
+} // end namespace LoongArchMatInt
+} // end namespace llvm
#endif
diff --git a/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h b/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h
index 6fc13d52c065..b24cf879512c 100644
--- a/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h
+++ b/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h
@@ -16,6 +16,6 @@ class Target;
Target &getTheLoongArch32Target();
Target &getTheLoongArch64Target();
-} // namespace llvm
+} // end namespace llvm
#endif // LLVM_LIB_TARGET_LOONGARCH_TARGETINFO_LOONGARCHTARGETINFO_H
diff --git a/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp b/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
index cb6d53ec0a12..5dc2bf07ddd5 100644
--- a/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
@@ -31,8 +31,8 @@ public:
: CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
/*LegalizerInfo*/ nullptr, /*EnableOpt*/ false,
/*EnableOptSize*/ false, /*EnableMinSize*/ false) {}
- virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
- MachineIRBuilder &B) const override;
+ bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+ MachineIRBuilder &B) const override;
};
bool MipsPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
index b700a9ede39b..a19253da440e 100644
--- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -81,6 +82,20 @@ bool RISCVCodeGenPrepare::optimizeZExt(ZExtInst *ZExt) {
return true;
}
+ // Convert (zext (abs(i32 X, i1 1))) -> (sext (abs(i32 X, i1 1))). If abs of
+ // INT_MIN is poison, the sign bit is zero.
+ using namespace PatternMatch;
+ if (match(Src, m_Intrinsic<Intrinsic::abs>(m_Value(), m_One()))) {
+ auto *SExt = new SExtInst(Src, ZExt->getType(), "", ZExt);
+ SExt->takeName(ZExt);
+ SExt->setDebugLoc(ZExt->getDebugLoc());
+
+ ZExt->replaceAllUsesWith(SExt);
+ ZExt->eraseFromParent();
+ ++NumZExtToSExt;
+ return true;
+ }
+
return false;
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 1702546b58a6..baa19e81e436 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1313,6 +1313,25 @@ bool RISCVTargetLowering::shouldSinkOperands(
return true;
}
+bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
+ unsigned Opc = VecOp.getOpcode();
+
+ // Assume target opcodes can't be scalarized.
+ // TODO - do we have any exceptions?
+ if (Opc >= ISD::BUILTIN_OP_END)
+ return false;
+
+ // If the vector op is not supported, try to convert to scalar.
+ EVT VecVT = VecOp.getValueType();
+ if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
+ return true;
+
+ // If the vector op is supported, but the scalar op is not, the transform may
+ // not be worthwhile.
+ EVT ScalarVT = VecVT.getScalarType();
+ return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
+}
+
bool RISCVTargetLowering::isOffsetFoldingLegal(
const GlobalAddressSDNode *GA) const {
// In order to maximise the opportunity for common subexpression elimination,
@@ -1387,18 +1406,28 @@ static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
}
}
- // Convert X > -1 to X >= 0.
- if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
- RHS = DAG.getConstant(0, DL, RHS.getValueType());
- CC = ISD::SETGE;
- return;
- }
- // Convert X < 1 to 0 >= X.
- if (CC == ISD::SETLT && isOneConstant(RHS)) {
- RHS = LHS;
- LHS = DAG.getConstant(0, DL, RHS.getValueType());
- CC = ISD::SETGE;
- return;
+ if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+ int64_t C = RHSC->getSExtValue();
+ switch (CC) {
+ default: break;
+ case ISD::SETGT:
+ // Convert X > -1 to X >= 0.
+ if (C == -1) {
+ RHS = DAG.getConstant(0, DL, RHS.getValueType());
+ CC = ISD::SETGE;
+ return;
+ }
+ break;
+ case ISD::SETLT:
+ // Convert X < 1 to 0 <= X.
+ if (C == 1) {
+ RHS = LHS;
+ LHS = DAG.getConstant(0, DL, RHS.getValueType());
+ CC = ISD::SETGE;
+ return;
+ }
+ break;
+ }
}
switch (CC) {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 5e15176de59c..6ecf8b8324d4 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -376,6 +376,7 @@ public:
SelectionDAG &DAG) const override;
bool shouldSinkOperands(Instruction *I,
SmallVectorImpl<Use *> &Ops) const override;
+ bool shouldScalarizeBinop(SDValue VecOp) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 4aa9ded5b3a2..beb49f5f6249 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -134,14 +134,13 @@ public:
getSerializableDirectMachineOperandTargetFlags() const override;
// Return true if the function can safely be outlined from.
- virtual bool
- isFunctionSafeToOutlineFrom(MachineFunction &MF,
- bool OutlineFromLinkOnceODRs) const override;
+ bool isFunctionSafeToOutlineFrom(MachineFunction &MF,
+ bool OutlineFromLinkOnceODRs) const override;
// Return true if MBB is safe to outline from, and return any target-specific
// information in Flags.
- virtual bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
- unsigned &Flags) const override;
+ bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+ unsigned &Flags) const override;
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
@@ -150,17 +149,15 @@ public:
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
// Return if/how a given MachineInstr should be outlined.
- virtual outliner::InstrType
- getOutliningType(MachineBasicBlock::iterator &MBBI,
- unsigned Flags) const override;
+ outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MBBI,
+ unsigned Flags) const override;
// Insert a custom frame for outlined functions.
- virtual void
- buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF,
- const outliner::OutlinedFunction &OF) const override;
+ void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF,
+ const outliner::OutlinedFunction &OF) const override;
// Insert a call to an outlined function into a given basic block.
- virtual MachineBasicBlock::iterator
+ MachineBasicBlock::iterator
insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
MachineBasicBlock::iterator &It, MachineFunction &MF,
outliner::Candidate &C) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index d204c85d6179..cd1da4360002 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -696,52 +696,36 @@ def C_SRAI64_HINT : RVInst16CI<0b100, 0b01, (outs GPRC:$rd_wb),
//===----------------------------------------------------------------------===//
let EmitPriority = 0 in {
-let Predicates = [HasStdExtC, HasStdExtD] in
-def : InstAlias<"c.fld $rd, (${rs1})", (C_FLD FPR64C:$rd, GPRC:$rs1, 0)>;
-
+let Predicates = [HasStdExtC] in {
def : InstAlias<"c.lw $rd, (${rs1})", (C_LW GPRC:$rd, GPRC:$rs1, 0)>;
-
-let Predicates = [HasStdExtC, HasStdExtF, IsRV32] in
-def : InstAlias<"c.flw $rd, (${rs1})", (C_FLW FPR32C:$rd, GPRC:$rs1, 0)>;
-
-let Predicates = [HasStdExtC, IsRV64] in
-def : InstAlias<"c.ld $rd, (${rs1})", (C_LD GPRC:$rd, GPRC:$rs1, 0)>;
-
-let Predicates = [HasStdExtC, HasStdExtD] in
-def : InstAlias<"c.fsd $rs2, (${rs1})", (C_FSD FPR64C:$rs2, GPRC:$rs1, 0)>;
-
def : InstAlias<"c.sw $rs2, (${rs1})", (C_SW GPRC:$rs2, GPRC:$rs1, 0)>;
-
-let Predicates = [HasStdExtC, HasStdExtF, IsRV32] in
-def : InstAlias<"c.fsw $rs2, (${rs1})", (C_FSW FPR32C:$rs2, GPRC:$rs1, 0)>;
-
-let Predicates = [HasStdExtC, IsRV64] in
-def : InstAlias<"c.sd $rs2, (${rs1})", (C_SD GPRC:$rs2, GPRC:$rs1, 0)>;
-
-let Predicates = [HasStdExtC, HasStdExtD] in
-def : InstAlias<"c.fldsp $rd, (${rs1})", (C_FLDSP FPR64C:$rd, SP:$rs1, 0)>;
-
def : InstAlias<"c.lwsp $rd, (${rs1})", (C_LWSP GPRC:$rd, SP:$rs1, 0)>;
+def : InstAlias<"c.swsp $rs2, (${rs1})", (C_SWSP GPRC:$rs2, SP:$rs1, 0)>;
+}
-let Predicates = [HasStdExtC, HasStdExtF, IsRV32] in
-def : InstAlias<"c.flwsp $rd, (${rs1})", (C_FLWSP FPR32C:$rd, SP:$rs1, 0)>;
-
-let Predicates = [HasStdExtC, IsRV64] in
+let Predicates = [HasStdExtC, IsRV64] in {
+def : InstAlias<"c.ld $rd, (${rs1})", (C_LD GPRC:$rd, GPRC:$rs1, 0)>;
+def : InstAlias<"c.sd $rs2, (${rs1})", (C_SD GPRC:$rs2, GPRC:$rs1, 0)>;
def : InstAlias<"c.ldsp $rd, (${rs1})", (C_LDSP GPRC:$rd, SP:$rs1, 0)>;
+def : InstAlias<"c.sdsp $rs2, (${rs1})", (C_SDSP GPRC:$rs2, SP:$rs1, 0)>;
+}
-let Predicates = [HasStdExtC, HasStdExtD] in
-def : InstAlias<"c.fsdsp $rs2, (${rs1})", (C_FSDSP FPR64C:$rs2, SP:$rs1, 0)>;
-
-def : InstAlias<"c.swsp $rs2, (${rs1})", (C_SWSP GPRC:$rs2, SP:$rs1, 0)>;
-
-let Predicates = [HasStdExtC, HasStdExtF, IsRV32] in
+let Predicates = [HasStdExtC, HasStdExtF, IsRV32] in {
+def : InstAlias<"c.flw $rd, (${rs1})", (C_FLW FPR32C:$rd, GPRC:$rs1, 0)>;
+def : InstAlias<"c.fsw $rs2, (${rs1})", (C_FSW FPR32C:$rs2, GPRC:$rs1, 0)>;
+def : InstAlias<"c.flwsp $rd, (${rs1})", (C_FLWSP FPR32C:$rd, SP:$rs1, 0)>;
def : InstAlias<"c.fswsp $rs2, (${rs1})", (C_FSWSP FPR32C:$rs2, SP:$rs1, 0)>;
+}
-let Predicates = [HasStdExtC, IsRV64] in
-def : InstAlias<"c.sdsp $rs2, (${rs1})", (C_SDSP GPRC:$rs2, SP:$rs1, 0)>;
+let Predicates = [HasStdExtC, HasStdExtD] in {
+def : InstAlias<"c.fld $rd, (${rs1})", (C_FLD FPR64C:$rd, GPRC:$rs1, 0)>;
+def : InstAlias<"c.fsd $rs2, (${rs1})", (C_FSD FPR64C:$rs2, GPRC:$rs1, 0)>;
+def : InstAlias<"c.fldsp $rd, (${rs1})", (C_FLDSP FPR64C:$rd, SP:$rs1, 0)>;
+def : InstAlias<"c.fsdsp $rs2, (${rs1})", (C_FSDSP FPR64C:$rs2, SP:$rs1, 0)>;
}
+} // EmitPriority = 0
-//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===/i
// Compress Instruction tablegen backend.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.h b/llvm/lib/Target/RISCV/RISCVTargetMachine.h
index 087646fb5ed9..4b2a403c5c5b 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.h
@@ -44,8 +44,7 @@ public:
TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
- virtual bool isNoopAddrSpaceCast(unsigned SrcAS,
- unsigned DstAS) const override;
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DstAS) const override;
yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override;
yaml::MachineFunctionInfo *
diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
index 93ffa9847f06..db0936f3f56b 100644
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -81,25 +81,22 @@ public:
/// Particular to z/OS when in 64 bit mode
class SystemZXPLINK64Registers : public SystemZCallingConventionRegisters {
public:
- int getReturnFunctionAddressRegister() override final {
- return SystemZ::R7D;
- };
+ int getReturnFunctionAddressRegister() final { return SystemZ::R7D; };
- int getStackPointerRegister() override final { return SystemZ::R4D; };
+ int getStackPointerRegister() final { return SystemZ::R4D; };
- int getFramePointerRegister() override final { return SystemZ::R8D; };
+ int getFramePointerRegister() final { return SystemZ::R8D; };
int getAddressOfCalleeRegister() { return SystemZ::R6D; };
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF) const override final;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const final;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
- CallingConv::ID CC) const override final;
+ CallingConv::ID CC) const final;
- int getCallFrameSize() override final { return 128; }
+ int getCallFrameSize() final { return 128; }
- int getStackPointerBias() override final { return 2048; }
+ int getStackPointerBias() final { return 2048; }
/// Destroys the object. Bogus destructor overriding base class destructor
~SystemZXPLINK64Registers() = default;
@@ -109,23 +106,20 @@ public:
/// Particular when on zLinux in 64 bit mode
class SystemZELFRegisters : public SystemZCallingConventionRegisters {
public:
- int getReturnFunctionAddressRegister() override final {
- return SystemZ::R14D;
- };
+ int getReturnFunctionAddressRegister() final { return SystemZ::R14D; };
- int getStackPointerRegister() override final { return SystemZ::R15D; };
+ int getStackPointerRegister() final { return SystemZ::R15D; };
- int getFramePointerRegister() override final { return SystemZ::R11D; };
+ int getFramePointerRegister() final { return SystemZ::R11D; };
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF) const override final;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const final;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
- CallingConv::ID CC) const override final;
+ CallingConv::ID CC) const final;
- int getCallFrameSize() override final { return SystemZMC::ELFCallFrameSize; }
+ int getCallFrameSize() final { return SystemZMC::ELFCallFrameSize; }
- int getStackPointerBias() override final { return 0; }
+ int getStackPointerBias() final { return 0; }
/// Destroys the object. Bogus destructor overriding base class destructor
~SystemZELFRegisters() = default;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 2636acaf1604..ab6d6b4f7ef1 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -577,8 +577,9 @@ LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB,
CallParams.removeOperand(0);
// For funcrefs, call_indirect is done through __funcref_call_table and the
- // funcref is always installed in slot 0 of the table, therefore instead of having
- // the function pointer added at the end of the params list, a zero (the index in
+ // funcref is always installed in slot 0 of the table, therefore instead of
+ // having the function pointer added at the end of the params list, a zero
+ // (the index in
// __funcref_call_table is added).
if (IsFuncrefCall) {
Register RegZero =
@@ -1156,7 +1157,7 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
// If the callee is a GlobalAddress node (quite common, every direct call
// is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
// doesn't at MO_GOT which is not needed for direct calls.
- GlobalAddressSDNode* GA = cast<GlobalAddressSDNode>(Callee);
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Callee);
Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
getPointerTy(DAG.getDataLayout()),
GA->getOffset());
@@ -1719,20 +1720,12 @@ WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
const GlobalValue *GV = GA->getGlobal();
- // Currently Emscripten does not support dynamic linking with threads.
- // Therefore, if we have thread-local storage, only the local-exec model
- // is possible.
- // TODO: remove this and implement proper TLS models once Emscripten
- // supports dynamic linking with threads.
- if (GV->getThreadLocalMode() != GlobalValue::LocalExecTLSModel &&
- !Subtarget->getTargetTriple().isOSEmscripten()) {
- report_fatal_error("only -ftls-model=local-exec is supported for now on "
- "non-Emscripten OSes: variable " +
- GV->getName(),
- false);
- }
-
- auto model = GV->getThreadLocalMode();
+ // Currently only Emscripten supports dynamic linking with threads. Therefore,
+ // on other targets, if we have thread-local storage, only the local-exec
+ // model is possible.
+ auto model = Subtarget->getTargetTriple().isOSEmscripten()
+ ? GV->getThreadLocalMode()
+ : GlobalValue::LocalExecTLSModel;
// Unsupported TLS modes
assert(model != GlobalValue::NotThreadLocal);
@@ -1791,8 +1784,7 @@ SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
if (GV->getValueType()->isFunctionTy()) {
BaseName = MF.createExternalSymbolName("__table_base");
OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL;
- }
- else {
+ } else {
BaseName = MF.createExternalSymbolName("__memory_base");
OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL;
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5a4533c4bac4..b080ab7e138c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1041,6 +1041,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SMULO, MVT::v16i8, Custom);
setOperationAction(ISD::UMULO, MVT::v16i8, Custom);
+ setOperationAction(ISD::UMULO, MVT::v2i32, Custom);
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
setOperationAction(ISD::FABS, MVT::v2f64, Custom);
@@ -1255,6 +1256,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+ setOperationAction(ISD::SMULO, MVT::v2i32, Custom);
// We directly match byte blends in the backend as they match the VSELECT
// condition form.
@@ -19302,6 +19304,44 @@ static bool canonicalizeShuffleMaskWithCommute(ArrayRef<int> Mask) {
return false;
}
+static bool canCombineAsMaskOperation(SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget) {
+ if (!Subtarget.hasAVX512())
+ return false;
+
+ MVT VT = V1.getSimpleValueType().getScalarType();
+ if ((VT == MVT::i16 || VT == MVT::i8) && !Subtarget.hasBWI())
+ return false;
+
+ // i8 is better to be widen to i16, because there is PBLENDW for vXi16
+ // when the vector bit size is 128 or 256.
+ if (VT == MVT::i8 && V1.getSimpleValueType().getSizeInBits() < 512)
+ return false;
+
+ auto HasMaskOperation = [&](SDValue V) {
+ // TODO: Currently we only check limited opcode. We probably extend
+ // it to all binary operation by checking TLI.isBinOp().
+ switch (V->getOpcode()) {
+ default:
+ return false;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::AND:
+ case ISD::XOR:
+ break;
+ }
+ if (!V->hasOneUse())
+ return false;
+
+ return true;
+ };
+
+ if (HasMaskOperation(V1) || HasMaskOperation(V2))
+ return true;
+
+ return false;
+}
+
// Forward declaration.
static SDValue canonicalizeShuffleMaskWithHorizOp(
MutableArrayRef<SDValue> Ops, MutableArrayRef<int> Mask,
@@ -19377,6 +19417,7 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget,
// integers to handle flipping the low and high halves of AVX 256-bit vectors.
SmallVector<int, 16> WidenedMask;
if (VT.getScalarSizeInBits() < 64 && !Is1BitVector &&
+ !canCombineAsMaskOperation(V1, V2, Subtarget) &&
canWidenShuffleElements(OrigMask, Zeroable, V2IsZero, WidenedMask)) {
// Shuffle mask widening should not interfere with a broadcast opportunity
// by obfuscating the operands with bitcasts.
@@ -32379,6 +32420,43 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(Res);
return;
}
+ case ISD::SMULO:
+ case ISD::UMULO: {
+ EVT VT = N->getValueType(0);
+ assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
+ VT == MVT::v2i32 && "Unexpected VT!");
+ bool IsSigned = N->getOpcode() == ISD::SMULO;
+ unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ SDValue Op0 = DAG.getNode(ExtOpc, dl, MVT::v2i64, N->getOperand(0));
+ SDValue Op1 = DAG.getNode(ExtOpc, dl, MVT::v2i64, N->getOperand(1));
+ SDValue Res = DAG.getNode(ISD::MUL, dl, MVT::v2i64, Op0, Op1);
+ // Extract the high 32 bits from each result using PSHUFD.
+ // TODO: Could use SRL+TRUNCATE but that doesn't become a PSHUFD.
+ SDValue Hi = DAG.getBitcast(MVT::v4i32, Res);
+ Hi = DAG.getVectorShuffle(MVT::v4i32, dl, Hi, Hi, {1, 3, -1, -1});
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Hi,
+ DAG.getIntPtrConstant(0, dl));
+
+ // Truncate the low bits of the result. This will become PSHUFD.
+ Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+
+ SDValue HiCmp;
+ if (IsSigned) {
+ // SMULO overflows if the high bits don't match the sign of the low.
+ HiCmp = DAG.getNode(ISD::SRA, dl, VT, Res, DAG.getConstant(31, dl, VT));
+ } else {
+ // UMULO overflows if the high bits are non-zero.
+ HiCmp = DAG.getConstant(0, dl, VT);
+ }
+ SDValue Ovf = DAG.getSetCC(dl, N->getValueType(1), Hi, HiCmp, ISD::SETNE);
+
+ // Widen the result with by padding with undef.
+ Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Res,
+ DAG.getUNDEF(VT));
+ Results.push_back(Res);
+ Results.push_back(Ovf);
+ return;
+ }
case X86ISD::VPMADDWD: {
// Legalize types for X86ISD::VPMADDWD by widening.
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
@@ -37522,8 +37600,8 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
break;
}
if (IsBlend) {
- if (DAG.computeKnownBits(V1, DemandedZeroV1).isZero() &&
- DAG.computeKnownBits(V2, DemandedZeroV2).isZero()) {
+ if (DAG.MaskedVectorIsZero(V1, DemandedZeroV1) &&
+ DAG.MaskedVectorIsZero(V2, DemandedZeroV2)) {
Shuffle = ISD::OR;
SrcVT = DstVT = MaskVT.changeTypeToInteger();
return true;
@@ -41191,7 +41269,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
SDValue Src = Op.getOperand(0);
APInt DemandedUpperElts = DemandedElts;
DemandedUpperElts.clearLowBits(1);
- if (TLO.DAG.computeKnownBits(Src, DemandedUpperElts, Depth + 1).isZero())
+ if (TLO.DAG.MaskedVectorIsZero(Src, DemandedUpperElts, Depth + 1))
return TLO.CombineTo(Op, Src);
break;
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index af110884049b..85e5d0ba4c34 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1409,7 +1409,7 @@ namespace llvm {
Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
- virtual bool needsFixedCatchObjects() const override;
+ bool needsFixedCatchObjects() const override;
/// This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index 98da00c39bdb..81729e3618d8 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -544,7 +544,7 @@ public:
ArrayRef<std::pair<unsigned, const char *>>
getSerializableDirectMachineOperandTargetFlags() const override;
- virtual outliner::OutlinedFunction getOutliningCandidateInfo(
+ outliner::OutlinedFunction getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
bool isFunctionSafeToOutlineFrom(MachineFunction &MF,
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 1fd8b88dd776..35adaa3bde65 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -31,6 +31,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -427,27 +428,73 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {
return true;
}
+/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
+/// pessimistic codegen that has to account for setting errno and can enable
+/// vectorization.
+static bool
+foldSqrt(Instruction &I, TargetTransformInfo &TTI, TargetLibraryInfo &TLI) {
+ // Match a call to sqrt mathlib function.
+ auto *Call = dyn_cast<CallInst>(&I);
+ if (!Call)
+ return false;
+
+ Module *M = Call->getModule();
+ LibFunc Func;
+ if (!TLI.getLibFunc(*Call, Func) || !isLibFuncEmittable(M, &TLI, Func))
+ return false;
+
+ if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl)
+ return false;
+
+ // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created,
+ // and (3) we would not end up lowering to a libcall anyway (which could
+ // change the value of errno), then:
+ // (1) the operand arg must not be less than -0.0.
+ // (2) errno won't be set.
+ // (3) it is safe to convert this to an intrinsic call.
+ // TODO: Check if the arg is known non-negative.
+ Type *Ty = Call->getType();
+ if (TTI.haveFastSqrt(Ty) && Call->hasNoNaNs()) {
+ IRBuilder<> Builder(&I);
+ IRBuilderBase::FastMathFlagGuard Guard(Builder);
+ Builder.setFastMathFlags(Call->getFastMathFlags());
+
+ Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty);
+ Value *NewSqrt = Builder.CreateCall(Sqrt, Call->getArgOperand(0), "sqrt");
+ I.replaceAllUsesWith(NewSqrt);
+
+ // Explicitly erase the old call because a call with side effects is not
+ // trivially dead.
+ I.eraseFromParent();
+ return true;
+ }
+
+ return false;
+}
+
/// This is the entry point for folds that could be implemented in regular
/// InstCombine, but they are separated because they are not expected to
/// occur frequently and/or have more than a constant-length pattern match.
static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
- TargetTransformInfo &TTI) {
+ TargetTransformInfo &TTI,
+ TargetLibraryInfo &TLI) {
bool MadeChange = false;
for (BasicBlock &BB : F) {
// Ignore unreachable basic blocks.
if (!DT.isReachableFromEntry(&BB))
continue;
- // Do not delete instructions under here and invalidate the iterator.
+
// Walk the block backwards for efficiency. We're matching a chain of
// use->defs, so we're more likely to succeed by starting from the bottom.
// Also, we want to avoid matching partial patterns.
// TODO: It would be more efficient if we removed dead instructions
// iteratively in this loop rather than waiting until the end.
- for (Instruction &I : llvm::reverse(BB)) {
+ for (Instruction &I : make_early_inc_range(llvm::reverse(BB))) {
MadeChange |= foldAnyOrAllBitsSet(I);
MadeChange |= foldGuardedFunnelShift(I, DT);
MadeChange |= tryToRecognizePopCount(I);
MadeChange |= tryToFPToSat(I, TTI);
+ MadeChange |= foldSqrt(I, TTI, TLI);
}
}
@@ -467,7 +514,7 @@ static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
const DataLayout &DL = F.getParent()->getDataLayout();
TruncInstCombine TIC(AC, TLI, DL, DT);
MadeChange |= TIC.run(F);
- MadeChange |= foldUnusualPatterns(F, DT, TTI);
+ MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI);
return MadeChange;
}
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 62cfc3294968..8c77b6937737 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -249,7 +249,8 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,
{LLVMContext::MD_range, LLVMContext::MD_nonnull,
LLVMContext::MD_dereferenceable,
LLVMContext::MD_dereferenceable_or_null,
- LLVMContext::MD_align, LLVMContext::MD_noundef});
+ LLVMContext::MD_align, LLVMContext::MD_noundef,
+ LLVMContext::MD_nontemporal});
}
Args.push_back(LI);
ArgAttrVec.push_back(AttributeSet());
@@ -631,8 +632,7 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
// Sort parts by offset.
append_range(ArgPartsVec, ArgParts);
- sort(ArgPartsVec,
- [](const auto &A, const auto &B) { return A.first < B.first; });
+ sort(ArgPartsVec, llvm::less_first());
// Make sure the parts are non-overlapping.
int64_t Offset = ArgPartsVec[0].first;
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 660ff3ee9563..83252fec3ea8 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -3328,7 +3328,7 @@ struct AANoAliasReturned final : AANoAliasImpl {
}
/// See AbstractAttribute::updateImpl(...).
- virtual ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus updateImpl(Attributor &A) override {
auto CheckReturnValue = [&](Value &RV) -> bool {
if (Constant *C = dyn_cast<Constant>(&RV))
@@ -3427,7 +3427,7 @@ struct AAIsDeadValueImpl : public AAIsDead {
}
/// See AbstractAttribute::getAsStr().
- virtual const std::string getAsStr() const override {
+ const std::string getAsStr() const override {
return isAssumedDead() ? "assumed-dead" : "assumed-live";
}
@@ -4500,9 +4500,8 @@ struct AAAlignImpl : AAAlign {
// to avoid making the alignment explicit if it did not improve.
/// See AbstractAttribute::getDeducedAttributes
- virtual void
- getDeducedAttributes(LLVMContext &Ctx,
- SmallVectorImpl<Attribute> &Attrs) const override {
+ void getDeducedAttributes(LLVMContext &Ctx,
+ SmallVectorImpl<Attribute> &Attrs) const override {
if (getAssumedAlign() > 1)
Attrs.emplace_back(
Attribute::getWithAlignment(Ctx, Align(getAssumedAlign())));
@@ -4709,7 +4708,7 @@ struct AANoReturnImpl : public AANoReturn {
}
/// See AbstractAttribute::updateImpl(Attributor &A).
- virtual ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus updateImpl(Attributor &A) override {
auto CheckForNoReturn = [](Instruction &) { return false; };
bool UsedAssumedInformation = false;
if (!A.checkForAllInstructions(CheckForNoReturn, *this,
@@ -4972,9 +4971,8 @@ struct AANoCaptureImpl : public AANoCapture {
ChangeStatus updateImpl(Attributor &A) override;
/// see AbstractAttribute::isAssumedNoCaptureMaybeReturned(...).
- virtual void
- getDeducedAttributes(LLVMContext &Ctx,
- SmallVectorImpl<Attribute> &Attrs) const override {
+ void getDeducedAttributes(LLVMContext &Ctx,
+ SmallVectorImpl<Attribute> &Attrs) const override {
if (!isAssumedNoCaptureMaybeReturned())
return;
@@ -6848,7 +6846,7 @@ struct AAPrivatizablePtrFloating : public AAPrivatizablePtrImpl {
: AAPrivatizablePtrImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
- virtual void initialize(Attributor &A) override {
+ void initialize(Attributor &A) override {
// TODO: We can privatize more than arguments.
indicatePessimisticFixpoint();
}
@@ -7222,7 +7220,7 @@ struct AAMemoryBehaviorFunction final : public AAMemoryBehaviorImpl {
: AAMemoryBehaviorImpl(IRP, A) {}
/// See AbstractAttribute::updateImpl(Attributor &A).
- virtual ChangeStatus updateImpl(Attributor &A) override;
+ ChangeStatus updateImpl(Attributor &A) override;
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
@@ -7934,7 +7932,7 @@ struct AAMemoryLocationFunction final : public AAMemoryLocationImpl {
: AAMemoryLocationImpl(IRP, A) {}
/// See AbstractAttribute::updateImpl(Attributor &A).
- virtual ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus updateImpl(Attributor &A) override {
const auto &MemBehaviorAA =
A.getAAFor<AAMemoryBehavior>(*this, getIRPosition(), DepClassTy::NONE);
@@ -9332,13 +9330,13 @@ struct AANoUndefCallSiteReturned final
struct AACallEdgesImpl : public AACallEdges {
AACallEdgesImpl(const IRPosition &IRP, Attributor &A) : AACallEdges(IRP, A) {}
- virtual const SetVector<Function *> &getOptimisticEdges() const override {
+ const SetVector<Function *> &getOptimisticEdges() const override {
return CalledFunctions;
}
- virtual bool hasUnknownCallee() const override { return HasUnknownCallee; }
+ bool hasUnknownCallee() const override { return HasUnknownCallee; }
- virtual bool hasNonAsmUnknownCallee() const override {
+ bool hasNonAsmUnknownCallee() const override {
return HasUnknownCalleeNonAsm;
}
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 49077f92884f..50710eaa1b57 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -931,10 +931,9 @@ static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
// a value can't capture arguments. Don't analyze them.
if (F->onlyReadsMemory() && F->doesNotThrow() &&
F->getReturnType()->isVoidTy()) {
- for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E;
- ++A) {
- if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
- A->addAttr(Attribute::NoCapture);
+ for (Argument &A : F->args()) {
+ if (A.getType()->isPointerTy() && !A.hasNoCaptureAttr()) {
+ A.addAttr(Attribute::NoCapture);
++NumNoCapture;
Changed.insert(F);
}
@@ -942,44 +941,43 @@ static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
continue;
}
- for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E;
- ++A) {
- if (!A->getType()->isPointerTy())
+ for (Argument &A : F->args()) {
+ if (!A.getType()->isPointerTy())
continue;
bool HasNonLocalUses = false;
- if (!A->hasNoCaptureAttr()) {
+ if (!A.hasNoCaptureAttr()) {
ArgumentUsesTracker Tracker(SCCNodes);
- PointerMayBeCaptured(&*A, &Tracker);
+ PointerMayBeCaptured(&A, &Tracker);
if (!Tracker.Captured) {
if (Tracker.Uses.empty()) {
// If it's trivially not captured, mark it nocapture now.
- A->addAttr(Attribute::NoCapture);
+ A.addAttr(Attribute::NoCapture);
++NumNoCapture;
Changed.insert(F);
} else {
// If it's not trivially captured and not trivially not captured,
// then it must be calling into another function in our SCC. Save
// its particulars for Argument-SCC analysis later.
- ArgumentGraphNode *Node = AG[&*A];
+ ArgumentGraphNode *Node = AG[&A];
for (Argument *Use : Tracker.Uses) {
Node->Uses.push_back(AG[Use]);
- if (Use != &*A)
+ if (Use != &A)
HasNonLocalUses = true;
}
}
}
// Otherwise, it's captured. Don't bother doing SCC analysis on it.
}
- if (!HasNonLocalUses && !A->onlyReadsMemory()) {
+ if (!HasNonLocalUses && !A.onlyReadsMemory()) {
// Can we determine that it's readonly/readnone/writeonly without doing
// an SCC? Note that we don't allow any calls at all here, or else our
// result will be dependent on the iteration order through the
// functions in the SCC.
SmallPtrSet<Argument *, 8> Self;
- Self.insert(&*A);
- Attribute::AttrKind R = determinePointerAccessAttrs(&*A, Self);
+ Self.insert(&A);
+ Attribute::AttrKind R = determinePointerAccessAttrs(&A, Self);
if (R != Attribute::None)
- if (addAccessAttr(A, R))
+ if (addAccessAttr(&A, R))
Changed.insert(F);
}
}
@@ -1017,12 +1015,10 @@ static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
}
bool SCCCaptured = false;
- for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end();
- I != E && !SCCCaptured; ++I) {
- ArgumentGraphNode *Node = *I;
- if (Node->Uses.empty()) {
- if (!Node->Definition->hasNoCaptureAttr())
- SCCCaptured = true;
+ for (ArgumentGraphNode *Node : ArgumentSCC) {
+ if (Node->Uses.empty() && !Node->Definition->hasNoCaptureAttr()) {
+ SCCCaptured = true;
+ break;
}
}
if (SCCCaptured)
@@ -1035,9 +1031,7 @@ static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
ArgumentSCCNodes.insert(I->Definition);
}
- for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end();
- I != E && !SCCCaptured; ++I) {
- ArgumentGraphNode *N = *I;
+ for (ArgumentGraphNode *N : ArgumentSCC) {
for (ArgumentGraphNode *Use : N->Uses) {
Argument *A = Use->Definition;
if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A))
@@ -1045,12 +1039,14 @@ static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
SCCCaptured = true;
break;
}
+ if (SCCCaptured)
+ break;
}
if (SCCCaptured)
continue;
- for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
- Argument *A = ArgumentSCC[i]->Definition;
+ for (ArgumentGraphNode *N : ArgumentSCC) {
+ Argument *A = N->Definition;
A->addAttr(Attribute::NoCapture);
++NumNoCapture;
Changed.insert(A->getParent());
@@ -1078,16 +1074,17 @@ static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
};
Attribute::AttrKind AccessAttr = Attribute::ReadNone;
- for (unsigned i = 0, e = ArgumentSCC.size();
- i != e && AccessAttr != Attribute::None; ++i) {
- Argument *A = ArgumentSCC[i]->Definition;
+ for (ArgumentGraphNode *N : ArgumentSCC) {
+ Argument *A = N->Definition;
Attribute::AttrKind K = determinePointerAccessAttrs(A, ArgumentSCCNodes);
AccessAttr = meetAccessAttr(AccessAttr, K);
+ if (AccessAttr == Attribute::None)
+ break;
}
if (AccessAttr != Attribute::None) {
- for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
- Argument *A = ArgumentSCC[i]->Definition;
+ for (ArgumentGraphNode *N : ArgumentSCC) {
+ Argument *A = N->Definition;
if (addAccessAttr(A, AccessAttr))
Changed.insert(A->getParent());
}
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index ec26db8bfc0b..6df0409256bb 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -470,8 +470,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
// Sort by offset.
SmallVector<std::pair<uint64_t, Type *>, 16> TypesVector;
append_range(TypesVector, Types);
- sort(TypesVector,
- [](const auto &A, const auto &B) { return A.first < B.first; });
+ sort(TypesVector, llvm::less_first());
// Check that the types are non-overlapping.
uint64_t Offset = 0;
diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index 6bf25df101fa..e3e4908f085b 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -1778,35 +1778,48 @@ void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) {
Old->replaceUsesWithIf(New, isDirectCall);
}
+static void dropTypeTests(Module &M, Function &TypeTestFunc) {
+ for (Use &U : llvm::make_early_inc_range(TypeTestFunc.uses())) {
+ auto *CI = cast<CallInst>(U.getUser());
+ // Find and erase llvm.assume intrinsics for this llvm.type.test call.
+ for (Use &CIU : llvm::make_early_inc_range(CI->uses()))
+ if (auto *Assume = dyn_cast<AssumeInst>(CIU.getUser()))
+ Assume->eraseFromParent();
+ // If the assume was merged with another assume, we might have a use on a
+ // phi (which will feed the assume). Simply replace the use on the phi
+ // with "true" and leave the merged assume.
+ if (!CI->use_empty()) {
+ assert(
+ all_of(CI->users(), [](User *U) -> bool { return isa<PHINode>(U); }));
+ CI->replaceAllUsesWith(ConstantInt::getTrue(M.getContext()));
+ }
+ CI->eraseFromParent();
+ }
+}
+
bool LowerTypeTestsModule::lower() {
Function *TypeTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
- if (DropTypeTests && TypeTestFunc) {
- for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
- auto *CI = cast<CallInst>(U.getUser());
- // Find and erase llvm.assume intrinsics for this llvm.type.test call.
- for (Use &CIU : llvm::make_early_inc_range(CI->uses()))
- if (auto *Assume = dyn_cast<AssumeInst>(CIU.getUser()))
- Assume->eraseFromParent();
- // If the assume was merged with another assume, we might have a use on a
- // phi (which will feed the assume). Simply replace the use on the phi
- // with "true" and leave the merged assume.
- if (!CI->use_empty()) {
- assert(all_of(CI->users(),
- [](User *U) -> bool { return isa<PHINode>(U); }));
- CI->replaceAllUsesWith(ConstantInt::getTrue(M.getContext()));
- }
- CI->eraseFromParent();
+ if (DropTypeTests) {
+ if (TypeTestFunc)
+ dropTypeTests(M, *TypeTestFunc);
+ // Normally we'd have already removed all @llvm.public.type.test calls,
+ // except for in the case where we originally were performing ThinLTO but
+ // decided not to in the backend.
+ Function *PublicTypeTestFunc =
+ M.getFunction(Intrinsic::getName(Intrinsic::public_type_test));
+ if (PublicTypeTestFunc)
+ dropTypeTests(M, *PublicTypeTestFunc);
+ if (TypeTestFunc || PublicTypeTestFunc) {
+ // We have deleted the type intrinsics, so we no longer have enough
+ // information to reason about the liveness of virtual function pointers
+ // in GlobalDCE.
+ for (GlobalVariable &GV : M.globals())
+ GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
+ return true;
}
-
- // We have deleted the type intrinsics, so we no longer have enough
- // information to reason about the liveness of virtual function pointers
- // in GlobalDCE.
- for (GlobalVariable &GV : M.globals())
- GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
-
- return true;
+ return false;
}
// If only some of the modules were split, we cannot correctly perform
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 0b42fc151991..ef2384faa273 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -499,18 +499,6 @@ struct OMPInformationCache : public InformationCache {
}
#include "llvm/Frontend/OpenMP/OMPKinds.def"
- // Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_`
- // functions, except if `optnone` is present.
- if (isOpenMPDevice(M)) {
- for (Function &F : M) {
- for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"})
- if (F.hasFnAttribute(Attribute::NoInline) &&
- F.getName().startswith(Prefix) &&
- !F.hasFnAttribute(Attribute::OptimizeNone))
- F.removeFnAttr(Attribute::NoInline);
- }
- }
-
// TODO: We should attach the attributes defined in OMPKinds.def.
}
diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp
index 26fb7d676429..0453af184a72 100644
--- a/llvm/lib/Transforms/IPO/SCCP.cpp
+++ b/llvm/lib/Transforms/IPO/SCCP.cpp
@@ -148,7 +148,7 @@ struct FunctionSpecializationLegacyPass : public ModulePass {
AU.addRequired<TargetTransformInfoWrapperPass>();
}
- virtual bool runOnModule(Module &M) override {
+ bool runOnModule(Module &M) override {
if (skipModule(M))
return false;
diff --git a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index a360a768a2bc..ef7af551a328 100644
--- a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -132,6 +132,14 @@ void promoteTypeIds(Module &M, StringRef ModuleId) {
}
}
+ if (Function *PublicTypeTestFunc =
+ M.getFunction(Intrinsic::getName(Intrinsic::public_type_test))) {
+ for (const Use &U : PublicTypeTestFunc->uses()) {
+ auto CI = cast<CallInst>(U.getUser());
+ ExternalizeTypeId(CI, 1);
+ }
+ }
+
if (Function *TypeCheckedLoadFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) {
for (const Use &U : TypeCheckedLoadFunc->uses()) {
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index ad00c116ce0a..18efe99f7cb4 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -773,15 +773,14 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
return PreservedAnalyses::none();
}
+namespace llvm {
// Enable whole program visibility if enabled by client (e.g. linker) or
// internal option, and not force disabled.
-static bool hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) {
+bool hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) {
return (WholeProgramVisibilityEnabledInLTO || WholeProgramVisibility) &&
!DisableWholeProgramVisibility;
}
-namespace llvm {
-
/// If whole program visibility asserted, then upgrade all public vcall
/// visibility metadata on vtable definitions to linkage unit visibility in
/// Module IR (for regular or hybrid LTO).
@@ -790,7 +789,7 @@ void updateVCallVisibilityInModule(
const DenseSet<GlobalValue::GUID> &DynamicExportSymbols) {
if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
return;
- for (GlobalVariable &GV : M.globals())
+ for (GlobalVariable &GV : M.globals()) {
// Add linkage unit visibility to any variable with type metadata, which are
// the vtable definitions. We won't have an existing vcall_visibility
// metadata on vtable definitions with public visibility.
@@ -800,6 +799,34 @@ void updateVCallVisibilityInModule(
// linker, as we have no information on their eventual use.
!DynamicExportSymbols.count(GV.getGUID()))
GV.setVCallVisibilityMetadata(GlobalObject::VCallVisibilityLinkageUnit);
+ }
+}
+
+void updatePublicTypeTestCalls(Module &M,
+ bool WholeProgramVisibilityEnabledInLTO) {
+ Function *PublicTypeTestFunc =
+ M.getFunction(Intrinsic::getName(Intrinsic::public_type_test));
+ if (!PublicTypeTestFunc)
+ return;
+ if (hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) {
+ Function *TypeTestFunc =
+ Intrinsic::getDeclaration(&M, Intrinsic::type_test);
+ for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
+ auto *CI = cast<CallInst>(U.getUser());
+ auto *NewCI = CallInst::Create(
+ TypeTestFunc, {CI->getArgOperand(0), CI->getArgOperand(1)}, None, "",
+ CI);
+ CI->replaceAllUsesWith(NewCI);
+ CI->eraseFromParent();
+ }
+ } else {
+ auto *True = ConstantInt::getTrue(M.getContext());
+ for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
+ auto *CI = cast<CallInst>(U.getUser());
+ CI->replaceAllUsesWith(True);
+ CI->eraseFromParent();
+ }
+ }
}
/// If whole program visibility asserted, then upgrade all public vcall
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 827b25533513..664226ec187b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -597,10 +597,9 @@ public:
/// demanded bits.
bool SimplifyDemandedInstructionBits(Instruction &Inst);
- virtual Value *
- SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts,
- unsigned Depth = 0,
- bool AllowMultipleUsers = false) override;
+ Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
+ APInt &UndefElts, unsigned Depth = 0,
+ bool AllowMultipleUsers = false) override;
/// Canonicalize the position of binops relative to shufflevector.
Instruction *foldVectorBinop(BinaryOperator &Inst);
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index cf2754b1dd60..3274e36ab71a 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1232,7 +1232,9 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
// dynamic alloca instrumentation for them as well.
!AI.isUsedWithInAlloca() &&
// swifterror allocas are register promoted by ISel
- !AI.isSwiftError());
+ !AI.isSwiftError() &&
+ // safe allocas are not interesting
+ !(SSGI && SSGI->isSafe(AI)));
ProcessedAllocas[&AI] = IsInteresting;
return IsInteresting;
diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index fd2eaee8b47d..013a119c5096 100644
--- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -213,10 +213,12 @@ bool LoopDataPrefetchLegacyPass::runOnFunction(Function &F) {
bool LoopDataPrefetch::run() {
// If PrefetchDistance is not set, don't run the pass. This gives an
// opportunity for targets to run this pass for selected subtargets only
- // (whose TTI sets PrefetchDistance).
- if (getPrefetchDistance() == 0)
+ // (whose TTI sets PrefetchDistance and CacheLineSize).
+ if (getPrefetchDistance() == 0 || TTI->getCacheLineSize() == 0) {
+ LLVM_DEBUG(dbgs() << "Please set both PrefetchDistance and CacheLineSize "
+ "for loop data prefetch.\n");
return false;
- assert(TTI->getCacheLineSize() && "Cache line size is not set for target");
+ }
bool MadeChange = false;
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index c05906649f16..f1e1359255bd 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -338,6 +338,9 @@ class LowerMatrixIntrinsics {
Value *extractVector(unsigned I, unsigned J, unsigned NumElts,
IRBuilder<> &Builder) const {
Value *Vec = isColumnMajor() ? getColumn(J) : getRow(I);
+ assert(cast<FixedVectorType>(Vec->getType())->getNumElements() >=
+ NumElts &&
+ "Extracted vector will contain poison values");
return Builder.CreateShuffleVector(
Vec, createSequentialMask(isColumnMajor() ? I : J, NumElts, 0),
"block");
@@ -1423,13 +1426,13 @@ public:
FixedVectorType::get(MatMul->getType()->getScalarType(), TileSize);
MatrixTy TileResult;
// Insert in the inner loop header.
- Builder.SetInsertPoint(TI.InnerLoopHeader->getTerminator());
+ Builder.SetInsertPoint(TI.KLoop.Header->getTerminator());
// Create PHI nodes for the result columns to accumulate across iterations.
SmallVector<PHINode *, 4> ColumnPhis;
for (unsigned I = 0; I < TileSize; I++) {
auto *Phi = Builder.CreatePHI(TileVecTy, 2, "result.vec." + Twine(I));
Phi->addIncoming(ConstantAggregateZero::get(TileVecTy),
- TI.RowLoopHeader->getSingleSuccessor());
+ TI.RowLoop.Header->getSingleSuccessor());
TileResult.addVector(Phi);
ColumnPhis.push_back(Phi);
}
@@ -1438,27 +1441,29 @@ public:
// Res += Load(CurrentRow, K) * Load(K, CurrentColumn)
Builder.SetInsertPoint(InnerBody->getTerminator());
// Load tiles of the operands.
- MatrixTy A = loadMatrix(LPtr, {}, false, LShape, TI.CurrentRow, TI.CurrentK,
- {TileSize, TileSize}, EltType, Builder);
- MatrixTy B = loadMatrix(RPtr, {}, false, RShape, TI.CurrentK, TI.CurrentCol,
- {TileSize, TileSize}, EltType, Builder);
+ MatrixTy A =
+ loadMatrix(LPtr, {}, false, LShape, TI.RowLoop.Index, TI.KLoop.Index,
+ {TileSize, TileSize}, EltType, Builder);
+ MatrixTy B =
+ loadMatrix(RPtr, {}, false, RShape, TI.KLoop.Index, TI.ColumnLoop.Index,
+ {TileSize, TileSize}, EltType, Builder);
emitMatrixMultiply(TileResult, A, B, Builder, true, false,
getFastMathFlags(MatMul));
// Store result after the inner loop is done.
- Builder.SetInsertPoint(TI.RowLoopLatch->getTerminator());
+ Builder.SetInsertPoint(TI.RowLoop.Latch->getTerminator());
storeMatrix(TileResult, Store->getPointerOperand(), Store->getAlign(),
Store->isVolatile(), {LShape.NumRows, RShape.NumColumns},
- TI.CurrentRow, TI.CurrentCol, EltType, Builder);
+ TI.RowLoop.Index, TI.ColumnLoop.Index, EltType, Builder);
for (unsigned I = 0; I < TileResult.getNumVectors(); I++)
- ColumnPhis[I]->addIncoming(TileResult.getVector(I), TI.InnerLoopLatch);
+ ColumnPhis[I]->addIncoming(TileResult.getVector(I), TI.KLoop.Latch);
// Force unrolling of a few iterations of the inner loop, to make sure there
// is enough work per iteration.
// FIXME: The unroller should make this decision directly instead, but
// currently the cost-model is not up to the task.
unsigned InnerLoopUnrollCount = std::min(10u, LShape.NumColumns / TileSize);
- addStringMetadataToLoop(LI->getLoopFor(TI.InnerLoopHeader),
+ addStringMetadataToLoop(LI->getLoopFor(TI.KLoop.Header),
"llvm.loop.unroll.count", InnerLoopUnrollCount);
}
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 240fb5e60687..cd2ce8ce336e 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -147,27 +147,27 @@ XorOpnd::XorOpnd(Value *V) {
/// Instruction::isAssociative() because it includes operations like fsub.
/// (This routine is only intended to be called for floating-point operations.)
static bool hasFPAssociativeFlags(Instruction *I) {
- assert(I && I->getType()->isFPOrFPVectorTy() && "Should only check FP ops");
+ assert(I && isa<FPMathOperator>(I) && "Should only check FP ops");
return I->hasAllowReassoc() && I->hasNoSignedZeros();
}
/// Return true if V is an instruction of the specified opcode and if it
/// only has one use.
static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
- auto *I = dyn_cast<Instruction>(V);
- if (I && I->hasOneUse() && I->getOpcode() == Opcode)
- if (!isa<FPMathOperator>(I) || hasFPAssociativeFlags(I))
- return cast<BinaryOperator>(I);
+ auto *BO = dyn_cast<BinaryOperator>(V);
+ if (BO && BO->hasOneUse() && BO->getOpcode() == Opcode)
+ if (!isa<FPMathOperator>(BO) || hasFPAssociativeFlags(BO))
+ return BO;
return nullptr;
}
static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
unsigned Opcode2) {
- auto *I = dyn_cast<Instruction>(V);
- if (I && I->hasOneUse() &&
- (I->getOpcode() == Opcode1 || I->getOpcode() == Opcode2))
- if (!isa<FPMathOperator>(I) || hasFPAssociativeFlags(I))
- return cast<BinaryOperator>(I);
+ auto *BO = dyn_cast<BinaryOperator>(V);
+ if (BO && BO->hasOneUse() &&
+ (BO->getOpcode() == Opcode1 || BO->getOpcode() == Opcode2))
+ if (!isa<FPMathOperator>(BO) || hasFPAssociativeFlags(BO))
+ return BO;
return nullptr;
}
@@ -778,7 +778,7 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
Constant *Undef = UndefValue::get(I->getType());
NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode),
Undef, Undef, "", I);
- if (NewOp->getType()->isFPOrFPVectorTy())
+ if (isa<FPMathOperator>(NewOp))
NewOp->setFastMathFlags(I->getFastMathFlags());
} else {
NewOp = NodesToRewrite.pop_back_val();
@@ -2227,7 +2227,7 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
// Don't optimize floating-point instructions unless they have the
// appropriate FastMathFlags for reassociation enabled.
- if (I->getType()->isFPOrFPVectorTy() && !hasFPAssociativeFlags(I))
+ if (isa<FPMathOperator>(I) && !hasFPAssociativeFlags(I))
return;
// Do not reassociate boolean (i1) expressions. We want to preserve the
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 00387ec426bf..878f9477a29d 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -825,6 +825,35 @@ static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
}
}
+/// Bundle operands of the inlined function must be added to inlined call sites.
+static void PropagateOperandBundles(Function::iterator InlinedBB,
+ Instruction *CallSiteEHPad) {
+ for (Instruction &II : llvm::make_early_inc_range(*InlinedBB)) {
+ CallBase *I = dyn_cast<CallBase>(&II);
+ if (!I)
+ continue;
+ // Skip call sites which already have a "funclet" bundle.
+ if (I->getOperandBundle(LLVMContext::OB_funclet))
+ continue;
+ // Skip call sites which are nounwind intrinsics (as long as they don't
+ // lower into regular function calls in the course of IR transformations).
+ auto *CalledFn =
+ dyn_cast<Function>(I->getCalledOperand()->stripPointerCasts());
+ if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow() &&
+ !IntrinsicInst::mayLowerToFunctionCall(CalledFn->getIntrinsicID()))
+ continue;
+
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ I->getOperandBundlesAsDefs(OpBundles);
+ OpBundles.emplace_back("funclet", CallSiteEHPad);
+
+ Instruction *NewInst = CallBase::Create(I, OpBundles, I);
+ NewInst->takeName(I);
+ I->replaceAllUsesWith(NewInst);
+ I->eraseFromParent();
+ }
+}
+
namespace {
/// Utility for cloning !noalias and !alias.scope metadata. When a code region
/// using scoped alias metadata is inlined, the aliasing relationships may not
@@ -2304,38 +2333,12 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// Update the lexical scopes of the new funclets and callsites.
// Anything that had 'none' as its parent is now nested inside the callsite's
// EHPad.
-
if (CallSiteEHPad) {
for (Function::iterator BB = FirstNewBlock->getIterator(),
E = Caller->end();
BB != E; ++BB) {
- // Add bundle operands to any top-level call sites.
- SmallVector<OperandBundleDef, 1> OpBundles;
- for (Instruction &II : llvm::make_early_inc_range(*BB)) {
- CallBase *I = dyn_cast<CallBase>(&II);
- if (!I)
- continue;
-
- // Skip call sites which are nounwind intrinsics.
- auto *CalledFn =
- dyn_cast<Function>(I->getCalledOperand()->stripPointerCasts());
- if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow())
- continue;
-
- // Skip call sites which already have a "funclet" bundle.
- if (I->getOperandBundle(LLVMContext::OB_funclet))
- continue;
-
- I->getOperandBundlesAsDefs(OpBundles);
- OpBundles.emplace_back("funclet", CallSiteEHPad);
-
- Instruction *NewInst = CallBase::Create(I, OpBundles, I);
- NewInst->takeName(I);
- I->replaceAllUsesWith(NewInst);
- I->eraseFromParent();
-
- OpBundles.clear();
- }
+ // Add bundle operands to inlined call sites.
+ PropagateOperandBundles(BB, CallSiteEHPad);
// It is problematic if the inlinee has a cleanupret which unwinds to
// caller and we inline it into a call site which doesn't unwind but into
diff --git a/llvm/lib/Transforms/Utils/MatrixUtils.cpp b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
index 6a137630deeb..e218773cf5da 100644
--- a/llvm/lib/Transforms/Utils/MatrixUtils.cpp
+++ b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
@@ -70,35 +70,35 @@ BasicBlock *TileInfo::CreateLoop(BasicBlock *Preheader, BasicBlock *Exit,
BasicBlock *TileInfo::CreateTiledLoops(BasicBlock *Start, BasicBlock *End,
IRBuilderBase &B, DomTreeUpdater &DTU,
LoopInfo &LI) {
- Loop *ColLoop = LI.AllocateLoop();
- Loop *RowLoop = LI.AllocateLoop();
- Loop *InnerLoop = LI.AllocateLoop();
- RowLoop->addChildLoop(InnerLoop);
- ColLoop->addChildLoop(RowLoop);
+ Loop *ColumnLoopInfo = LI.AllocateLoop();
+ Loop *RowLoopInfo = LI.AllocateLoop();
+ Loop *KLoopInfo = LI.AllocateLoop();
+ RowLoopInfo->addChildLoop(KLoopInfo);
+ ColumnLoopInfo->addChildLoop(RowLoopInfo);
if (Loop *ParentL = LI.getLoopFor(Start))
- ParentL->addChildLoop(ColLoop);
+ ParentL->addChildLoop(ColumnLoopInfo);
else
- LI.addTopLevelLoop(ColLoop);
+ LI.addTopLevelLoop(ColumnLoopInfo);
BasicBlock *ColBody =
CreateLoop(Start, End, B.getInt64(NumColumns), B.getInt64(TileSize),
- "cols", B, DTU, ColLoop, LI);
- BasicBlock *ColLatch = ColBody->getSingleSuccessor();
+ "cols", B, DTU, ColumnLoopInfo, LI);
+ ColumnLoop.Latch = ColBody->getSingleSuccessor();
BasicBlock *RowBody =
- CreateLoop(ColBody, ColLatch, B.getInt64(NumRows), B.getInt64(TileSize),
- "rows", B, DTU, RowLoop, LI);
- RowLoopLatch = RowBody->getSingleSuccessor();
+ CreateLoop(ColBody, ColumnLoop.Latch, B.getInt64(NumRows),
+ B.getInt64(TileSize), "rows", B, DTU, RowLoopInfo, LI);
+ RowLoop.Latch = RowBody->getSingleSuccessor();
BasicBlock *InnerBody =
- CreateLoop(RowBody, RowLoopLatch, B.getInt64(NumInner),
- B.getInt64(TileSize), "inner", B, DTU, InnerLoop, LI);
- InnerLoopLatch = InnerBody->getSingleSuccessor();
- ColumnLoopHeader = ColBody->getSinglePredecessor();
- RowLoopHeader = RowBody->getSinglePredecessor();
- InnerLoopHeader = InnerBody->getSinglePredecessor();
- CurrentRow = &*RowLoopHeader->begin();
- CurrentCol = &*ColumnLoopHeader->begin();
- CurrentK = &*InnerLoopHeader->begin();
+ CreateLoop(RowBody, RowLoop.Latch, B.getInt64(NumInner),
+ B.getInt64(TileSize), "inner", B, DTU, KLoopInfo, LI);
+ KLoop.Latch = InnerBody->getSingleSuccessor();
+ ColumnLoop.Header = ColBody->getSinglePredecessor();
+ RowLoop.Header = RowBody->getSinglePredecessor();
+ KLoop.Header = InnerBody->getSinglePredecessor();
+ RowLoop.Index = &*RowLoop.Header->begin();
+ ColumnLoop.Index = &*ColumnLoop.Header->begin();
+ KLoop.Index = &*KLoop.Header->begin();
return InnerBody;
}
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index bca3b0538c5d..03087d8370d5 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -75,39 +75,109 @@ static bool callHasFP128Argument(const CallInst *CI) {
});
}
-static Value *convertStrToNumber(CallInst *CI, StringRef &Str, Value *EndPtr,
- int64_t Base, IRBuilderBase &B) {
+// Convert the entire string Str representing an integer in Base, up to
+// the terminating nul if present, to a constant according to the rules
+// of strtoul[l] or, when AsSigned is set, of strtol[l]. On success
+// return the result, otherwise null.
+// The function assumes the string is encoded in ASCII and carefully
+// avoids converting sequences (including "") that the corresponding
+// library call might fail and set errno for.
+static Value *convertStrToInt(CallInst *CI, StringRef &Str, Value *EndPtr,
+ uint64_t Base, bool AsSigned, IRBuilderBase &B) {
if (Base < 2 || Base > 36)
- // handle special zero base
if (Base != 0)
+ // Fail for an invalid base (required by POSIX).
return nullptr;
- char *End;
- std::string nptr = Str.str();
- errno = 0;
- long long int Result = strtoll(nptr.c_str(), &End, Base);
- if (errno)
- return nullptr;
+ // Strip leading whitespace.
+ for (unsigned i = 0; i != Str.size(); ++i)
+ if (!isSpace((unsigned char)Str[i])) {
+ Str = Str.substr(i);
+ break;
+ }
- // if we assume all possible target locales are ASCII supersets,
- // then if strtoll successfully parses a number on the host,
- // it will also successfully parse the same way on the target
- if (*End != '\0')
+ if (Str.empty())
+ // Fail for empty subject sequences (POSIX allows but doesn't require
+ // strtol[l]/strtoul[l] to fail with EINVAL).
return nullptr;
- if (!isIntN(CI->getType()->getPrimitiveSizeInBits(), Result))
- return nullptr;
+ // Strip but remember the sign.
+ bool Negate = Str[0] == '-';
+ if (Str[0] == '-' || Str[0] == '+') {
+ Str = Str.drop_front();
+ if (Str.empty())
+ // Fail for a sign with nothing after it.
+ return nullptr;
+ }
+
+ // Set Max to the absolute value of the minimum (for signed), or
+ // to the maximum (for unsigned) value representable in the type.
+ Type *RetTy = CI->getType();
+ unsigned NBits = RetTy->getPrimitiveSizeInBits();
+ uint64_t Max = AsSigned && Negate ? 1 : 0;
+ Max += AsSigned ? maxIntN(NBits) : maxUIntN(NBits);
+
+ // Autodetect Base if it's zero and consume the "0x" prefix.
+ if (Str.size() > 1) {
+ if (Str[0] == '0') {
+ if (toUpper((unsigned char)Str[1]) == 'X') {
+ if (Str.size() == 2 || (Base && Base != 16))
+ // Fail if Base doesn't allow the "0x" prefix or for the prefix
+ // alone that implementations like BSD set errno to EINVAL for.
+ return nullptr;
+
+ Str = Str.drop_front(2);
+ Base = 16;
+ }
+ else if (Base == 0)
+ Base = 8;
+ } else if (Base == 0)
+ Base = 10;
+ }
+ else if (Base == 0)
+ Base = 10;
+
+ // Convert the rest of the subject sequence, not including the sign,
+ // to its uint64_t representation (this assumes the source character
+ // set is ASCII).
+ uint64_t Result = 0;
+ for (unsigned i = 0; i != Str.size(); ++i) {
+ unsigned char DigVal = Str[i];
+ if (isDigit(DigVal))
+ DigVal = DigVal - '0';
+ else {
+ DigVal = toUpper(DigVal);
+ if (isAlpha(DigVal))
+ DigVal = DigVal - 'A' + 10;
+ else
+ return nullptr;
+ }
+
+ if (DigVal >= Base)
+ // Fail if the digit is not valid in the Base.
+ return nullptr;
+
+ // Add the digit and fail if the result is not representable in
+ // the (unsigned form of the) destination type.
+ bool VFlow;
+ Result = SaturatingMultiplyAdd(Result, Base, (uint64_t)DigVal, &VFlow);
+ if (VFlow || Result > Max)
+ return nullptr;
+ }
if (EndPtr) {
// Store the pointer to the end.
- uint64_t ILen = End - nptr.c_str();
- Value *Off = B.getInt64(ILen);
+ Value *Off = B.getInt64(Str.size());
Value *StrBeg = CI->getArgOperand(0);
Value *StrEnd = B.CreateInBoundsGEP(B.getInt8Ty(), StrBeg, Off, "endptr");
B.CreateStore(StrEnd, EndPtr);
}
- return ConstantInt::get(CI->getType(), Result);
+ if (Negate)
+ // Unsigned negation doesn't overflow.
+ Result = -Result;
+
+ return ConstantInt::get(RetTy, Result);
}
static bool isOnlyUsedInComparisonWithZero(Value *V) {
@@ -2531,27 +2601,35 @@ Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilderBase &B) {
ConstantInt::get(CI->getType(), 0x7F));
}
+// Fold calls to atoi, atol, and atoll.
Value *LibCallSimplifier::optimizeAtoi(CallInst *CI, IRBuilderBase &B) {
+ CI->addParamAttr(0, Attribute::NoCapture);
+
StringRef Str;
if (!getConstantStringInfo(CI->getArgOperand(0), Str))
return nullptr;
- return convertStrToNumber(CI, Str, nullptr, 10, B);
+ return convertStrToInt(CI, Str, nullptr, 10, /*AsSigned=*/true, B);
}
-Value *LibCallSimplifier::optimizeStrtol(CallInst *CI, IRBuilderBase &B) {
- StringRef Str;
- if (!getConstantStringInfo(CI->getArgOperand(0), Str))
- return nullptr;
-
+// Fold calls to strtol, strtoll, strtoul, and strtoull.
+Value *LibCallSimplifier::optimizeStrToInt(CallInst *CI, IRBuilderBase &B,
+ bool AsSigned) {
Value *EndPtr = CI->getArgOperand(1);
- if (isa<ConstantPointerNull>(EndPtr))
+ if (isa<ConstantPointerNull>(EndPtr)) {
+ // With a null EndPtr, this function won't capture the main argument.
+ // It would be readonly too, except that it still may write to errno.
+ CI->addParamAttr(0, Attribute::NoCapture);
EndPtr = nullptr;
- else if (!isKnownNonZero(EndPtr, DL))
+ } else if (!isKnownNonZero(EndPtr, DL))
+ return nullptr;
+
+ StringRef Str;
+ if (!getConstantStringInfo(CI->getArgOperand(0), Str))
return nullptr;
if (ConstantInt *CInt = dyn_cast<ConstantInt>(CI->getArgOperand(2))) {
- return convertStrToNumber(CI, Str, EndPtr, CInt->getSExtValue(), B);
+ return convertStrToInt(CI, Str, EndPtr, CInt->getSExtValue(), AsSigned, B);
}
return nullptr;
@@ -3390,7 +3468,10 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
return optimizeAtoi(CI, Builder);
case LibFunc_strtol:
case LibFunc_strtoll:
- return optimizeStrtol(CI, Builder);
+ return optimizeStrToInt(CI, Builder, /*AsSigned=*/true);
+ case LibFunc_strtoul:
+ case LibFunc_strtoull:
+ return optimizeStrToInt(CI, Builder, /*AsSigned=*/false);
case LibFunc_printf:
return optimizePrintF(CI, Builder);
case LibFunc_sprintf:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b887ea41676b..238b074089aa 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -798,8 +798,7 @@ public:
// Override this function to handle the more complex control flow around the
// three loops.
- std::pair<BasicBlock *, Value *>
- createVectorizedLoopSkeleton() final override {
+ std::pair<BasicBlock *, Value *> createVectorizedLoopSkeleton() final {
return createEpilogueVectorizedLoopSkeleton();
}
@@ -835,8 +834,7 @@ public:
EPI, LVL, CM, BFI, PSI, Check) {}
/// Implements the interface for creating a vectorized skeleton using the
/// *main loop* strategy (ie the first pass of vplan execution).
- std::pair<BasicBlock *, Value *>
- createEpilogueVectorizedLoopSkeleton() final override;
+ std::pair<BasicBlock *, Value *> createEpilogueVectorizedLoopSkeleton() final;
protected:
/// Emits an iteration count bypass check once for the main loop (when \p
@@ -866,8 +864,7 @@ public:
}
/// Implements the interface for creating a vectorized skeleton using the
/// *epilogue loop* strategy (ie the second pass of vplan execution).
- std::pair<BasicBlock *, Value *>
- createEpilogueVectorizedLoopSkeleton() final override;
+ std::pair<BasicBlock *, Value *> createEpilogueVectorizedLoopSkeleton() final;
protected:
/// Emits an iteration count bypass check after the main vector loop has
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cd044c78d900..d69d1e3d19f3 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10972,9 +10972,7 @@ public:
It != E; ++It) {
PossibleRedValsVect.emplace_back();
auto RedValsVect = It->second.takeVector();
- stable_sort(RedValsVect, [](const auto &P1, const auto &P2) {
- return P1.second < P2.second;
- });
+ stable_sort(RedValsVect, llvm::less_second());
for (const std::pair<Value *, unsigned> &Data : RedValsVect)
PossibleRedValsVect.back().append(Data.second, Data.first);
}