aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-06-03 15:20:36 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-06-03 15:20:36 +0000
commitd288ef4c1788d3a951a7558c68312c2d320612b1 (patch)
treeece909a5200f95f85f0813599a9500620f4d9217 /lib
parentf382538d471e38a9b98f016c4caebd24c8d60b62 (diff)
downloadsrc-d288ef4c1788d3a951a7558c68312c2d320612b1.tar.gz
src-d288ef4c1788d3a951a7558c68312c2d320612b1.zip
Notes
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/ConstantFolding.cpp7
-rw-r--r--lib/Analysis/IndirectCallPromotionAnalysis.cpp2
-rw-r--r--lib/Analysis/InlineCost.cpp132
-rw-r--r--lib/Analysis/LazyValueInfo.cpp69
-rw-r--r--lib/Analysis/ModuleSummaryAnalysis.cpp12
-rw-r--r--lib/Analysis/OrderedBasicBlock.cpp2
-rw-r--r--lib/Analysis/RegionPass.cpp16
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp6
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp113
-rw-r--r--lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp4
-rw-r--r--lib/CodeGen/CodeGen.cpp1
-rw-r--r--lib/CodeGen/GlobalMerge.cpp4
-rw-r--r--lib/CodeGen/LivePhysRegs.cpp3
-rw-r--r--lib/CodeGen/LiveRegUnits.cpp56
-rw-r--r--lib/CodeGen/MachineRegionInfo.cpp46
-rw-r--r--lib/CodeGen/MachineVerifier.cpp43
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp100
-rw-r--r--lib/CodeGen/RegAllocBasic.cpp61
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp40
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp126
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp80
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp121
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp46
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp36
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp2
-rw-r--r--lib/DebugInfo/CodeView/CodeViewRecordIO.cpp14
-rw-r--r--lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp6
-rw-r--r--lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp32
-rw-r--r--lib/DebugInfo/CodeView/SymbolDumper.cpp4
-rw-r--r--lib/DebugInfo/CodeView/SymbolRecordMapping.cpp1
-rw-r--r--lib/DebugInfo/CodeView/SymbolSerializer.cpp8
-rw-r--r--lib/DebugInfo/MSF/MappedBlockStream.cpp63
-rw-r--r--lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp54
-rw-r--r--lib/DebugInfo/PDB/Native/DbiStream.cpp4
-rw-r--r--lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp6
-rw-r--r--lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp4
-rw-r--r--lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp28
-rw-r--r--lib/DebugInfo/PDB/Native/PDBFile.cpp9
-rw-r--r--lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp8
-rw-r--r--lib/DebugInfo/PDB/Native/PDBStringTable.cpp4
-rw-r--r--lib/DebugInfo/PDB/Native/TpiStream.cpp6
-rw-r--r--lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp8
-rw-r--r--lib/IR/DIBuilder.cpp33
-rw-r--r--lib/IR/DebugLoc.cpp81
-rw-r--r--lib/IR/OptBisect.cpp15
-rw-r--r--lib/LTO/LTO.cpp43
-rw-r--r--lib/LTO/LTOBackend.cpp7
-rw-r--r--lib/LTO/ThinLTOCodeGenerator.cpp16
-rw-r--r--lib/MC/WasmObjectWriter.cpp906
-rw-r--r--lib/ObjectYAML/CodeViewYAMLDebugSections.cpp407
-rw-r--r--lib/ObjectYAML/CodeViewYAMLSymbols.cpp12
-rw-r--r--lib/Passes/PassBuilder.cpp6
-rw-r--r--lib/Support/Triple.cpp6
-rw-r--r--lib/Target/AArch64/AArch64PBQPRegAlloc.h7
-rw-r--r--lib/Target/AArch64/AArch64SchedFalkorDetails.td41
-rw-r--r--lib/Target/AMDGPU/AMDGPU.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPU.td7
-rw-r--r--lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp10
-rw-r--r--lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp5
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.cpp1
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h14
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp3
-rw-r--r--lib/Target/AMDGPU/SIFoldOperands.cpp21
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp2
-rw-r--r--lib/Target/AMDGPU/SIInsertWaitcnts.cpp3
-rw-r--r--lib/Target/AMDGPU/SIInsertWaits.cpp2
-rw-r--r--lib/Target/AMDGPU/SMInstructions.td1
-rw-r--r--lib/Target/AMDGPU/VOP3Instructions.td3
-rw-r--r--lib/Target/ARM/ARM.td21
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp83
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h18
-rw-r--r--lib/Target/ARM/ARMCallLowering.cpp14
-rw-r--r--lib/Target/ARM/ARMSchedule.td4
-rw-r--r--lib/Target/ARM/ARMScheduleA57.td1471
-rw-r--r--lib/Target/ARM/ARMScheduleA57WriteRes.td323
-rw-r--r--lib/Target/ARM/ARMSubtarget.h5
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp4
-rw-r--r--lib/Target/Mips/MicroMipsSizeReduction.cpp57
-rw-r--r--lib/Target/WebAssembly/known_gcc_test_failures.txt3
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp135
-rw-r--r--lib/Transforms/Coroutines/CoroSplit.cpp2
-rw-r--r--lib/Transforms/Coroutines/Coroutines.cpp12
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp4
-rw-r--r--lib/Transforms/IPO/FunctionImport.cpp107
-rw-r--r--lib/Transforms/IPO/LowerTypeTests.cpp3
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp66
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp11
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp1
-rw-r--r--lib/Transforms/Instrumentation/SanitizerCoverage.cpp181
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp1
-rw-r--r--lib/Transforms/Scalar/LowerExpectIntrinsic.cpp162
-rw-r--r--lib/Transforms/Scalar/RewriteStatepointsForGC.cpp10
-rw-r--r--lib/Transforms/Scalar/SROA.cpp7
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp71
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp2
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp151
99 files changed, 4411 insertions, 1564 deletions
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 6a1af87450c9..a906770dbb34 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -1170,7 +1170,9 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
const DataLayout &DL,
const TargetLibraryInfo *TLI) {
// fold: icmp (inttoptr x), null -> icmp x, 0
+ // fold: icmp null, (inttoptr x) -> icmp 0, x
// fold: icmp (ptrtoint x), 0 -> icmp x, null
+ // fold: icmp 0, (ptrtoint x) -> icmp null, x
// fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
// fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y
//
@@ -1240,6 +1242,11 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
return ConstantFoldBinaryOpOperands(OpC, LHS, RHS, DL);
}
+ } else if (isa<ConstantExpr>(Ops1)) {
+ // If RHS is a constant expression, but the left side isn't, swap the
+ // operands and try again.
+ Predicate = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)Predicate);
+ return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI);
}
return ConstantExpr::getCompare(Predicate, Ops0, Ops1);
diff --git a/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index 3da33ac71421..ed233d201537 100644
--- a/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -43,7 +43,7 @@ static cl::opt<unsigned>
// The percent threshold for the direct-call target (this call site vs the
// total call count) for it to be considered as the promotion target.
static cl::opt<unsigned>
- ICPPercentThreshold("icp-percent-threshold", cl::init(33), cl::Hidden,
+ ICPPercentThreshold("icp-percent-threshold", cl::init(30), cl::Hidden,
cl::ZeroOrMore,
cl::desc("The percentage threshold for the promotion"));
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 4702569126c6..77c87928728a 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -54,11 +54,6 @@ static cl::opt<int>
cl::init(45),
cl::desc("Threshold for inlining cold callsites"));
-static cl::opt<bool>
- EnableGenericSwitchCost("inline-generic-switch-cost", cl::Hidden,
- cl::init(false),
- cl::desc("Enable generic switch cost model"));
-
// We introduce this threshold to help performance of instrumentation based
// PGO before we actually hook up inliner with analysis passes such as BPI and
// BFI.
@@ -1015,83 +1010,68 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
if (isa<ConstantInt>(V))
return true;
- if (EnableGenericSwitchCost) {
- // Assume the most general case where the swith is lowered into
- // either a jump table, bit test, or a balanced binary tree consisting of
- // case clusters without merging adjacent clusters with the same
- // destination. We do not consider the switches that are lowered with a mix
- // of jump table/bit test/binary search tree. The cost of the switch is
- // proportional to the size of the tree or the size of jump table range.
-
- // Exit early for a large switch, assuming one case needs at least one
- // instruction.
- // FIXME: This is not true for a bit test, but ignore such case for now to
- // save compile-time.
- int64_t CostLowerBound =
- std::min((int64_t)INT_MAX,
- (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
-
- if (CostLowerBound > Threshold) {
- Cost = CostLowerBound;
- return false;
- }
+ // Assume the most general case where the swith is lowered into
+ // either a jump table, bit test, or a balanced binary tree consisting of
+ // case clusters without merging adjacent clusters with the same
+ // destination. We do not consider the switches that are lowered with a mix
+ // of jump table/bit test/binary search tree. The cost of the switch is
+ // proportional to the size of the tree or the size of jump table range.
+ //
+ // NB: We convert large switches which are just used to initialize large phi
+ // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
+ // inlining those. It will prevent inlining in cases where the optimization
+ // does not (yet) fire.
- unsigned JumpTableSize = 0;
- unsigned NumCaseCluster =
- TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
+ // Exit early for a large switch, assuming one case needs at least one
+ // instruction.
+ // FIXME: This is not true for a bit test, but ignore such case for now to
+ // save compile-time.
+ int64_t CostLowerBound =
+ std::min((int64_t)INT_MAX,
+ (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
- // If suitable for a jump table, consider the cost for the table size and
- // branch to destination.
- if (JumpTableSize) {
- int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
- 4 * InlineConstants::InstrCost;
- Cost = std::min((int64_t)INT_MAX, JTCost + Cost);
- return false;
- }
+ if (CostLowerBound > Threshold) {
+ Cost = CostLowerBound;
+ return false;
+ }
- // Considering forming a binary search, we should find the number of nodes
- // which is same as the number of comparisons when lowered. For a given
- // number of clusters, n, we can define a recursive function, f(n), to find
- // the number of nodes in the tree. The recursion is :
- // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
- // and f(n) = n, when n <= 3.
- // This will lead a binary tree where the leaf should be either f(2) or f(3)
- // when n > 3. So, the number of comparisons from leaves should be n, while
- // the number of non-leaf should be :
- // 2^(log2(n) - 1) - 1
- // = 2^log2(n) * 2^-1 - 1
- // = n / 2 - 1.
- // Considering comparisons from leaf and non-leaf nodes, we can estimate the
- // number of comparisons in a simple closed form :
- // n + n / 2 - 1 = n * 3 / 2 - 1
- if (NumCaseCluster <= 3) {
- // Suppose a comparison includes one compare and one conditional branch.
- Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
- return false;
- }
- int64_t ExpectedNumberOfCompare = 3 * (uint64_t)NumCaseCluster / 2 - 1;
- uint64_t SwitchCost =
- ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
- Cost = std::min((uint64_t)INT_MAX, SwitchCost + Cost);
+ unsigned JumpTableSize = 0;
+ unsigned NumCaseCluster =
+ TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
+
+ // If suitable for a jump table, consider the cost for the table size and
+ // branch to destination.
+ if (JumpTableSize) {
+ int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
+ 4 * InlineConstants::InstrCost;
+ Cost = std::min((int64_t)INT_MAX, JTCost + Cost);
return false;
}
- // Use a simple switch cost model where we accumulate a cost proportional to
- // the number of distinct successor blocks. This fan-out in the CFG cannot
- // be represented for free even if we can represent the core switch as a
- // jumptable that takes a single instruction.
- ///
- // NB: We convert large switches which are just used to initialize large phi
- // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
- // inlining those. It will prevent inlining in cases where the optimization
- // does not (yet) fire.
- SmallPtrSet<BasicBlock *, 8> SuccessorBlocks;
- SuccessorBlocks.insert(SI.getDefaultDest());
- for (auto Case : SI.cases())
- SuccessorBlocks.insert(Case.getCaseSuccessor());
- // Add cost corresponding to the number of distinct destinations. The first
- // we model as free because of fallthrough.
- Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost;
+ // Considering forming a binary search, we should find the number of nodes
+ // which is same as the number of comparisons when lowered. For a given
+ // number of clusters, n, we can define a recursive function, f(n), to find
+ // the number of nodes in the tree. The recursion is :
+ // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
+ // and f(n) = n, when n <= 3.
+ // This will lead a binary tree where the leaf should be either f(2) or f(3)
+ // when n > 3. So, the number of comparisons from leaves should be n, while
+ // the number of non-leaf should be :
+ // 2^(log2(n) - 1) - 1
+ // = 2^log2(n) * 2^-1 - 1
+ // = n / 2 - 1.
+ // Considering comparisons from leaf and non-leaf nodes, we can estimate the
+ // number of comparisons in a simple closed form :
+ // n + n / 2 - 1 = n * 3 / 2 - 1
+ if (NumCaseCluster <= 3) {
+ // Suppose a comparison includes one compare and one conditional branch.
+ Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
+ return false;
+ }
+ int64_t ExpectedNumberOfCompare = 3 * (uint64_t)NumCaseCluster / 2 - 1;
+ uint64_t SwitchCost =
+ ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
+ Cost = std::min((uint64_t)INT_MAX, SwitchCost + Cost);
return false;
}
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index a2b9015a8a1d..6a9ae6440ace 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -662,13 +662,13 @@ namespace {
bool solveBlockValuePHINode(LVILatticeVal &BBLV, PHINode *PN, BasicBlock *BB);
bool solveBlockValueSelect(LVILatticeVal &BBLV, SelectInst *S,
BasicBlock *BB);
- bool solveBlockValueBinaryOp(LVILatticeVal &BBLV, Instruction *BBI,
+ bool solveBlockValueBinaryOp(LVILatticeVal &BBLV, BinaryOperator *BBI,
BasicBlock *BB);
- bool solveBlockValueCast(LVILatticeVal &BBLV, Instruction *BBI,
+ bool solveBlockValueCast(LVILatticeVal &BBLV, CastInst *CI,
BasicBlock *BB);
void intersectAssumeOrGuardBlockValueConstantRange(Value *Val,
LVILatticeVal &BBLV,
- Instruction *BBI);
+ Instruction *BBI);
void solve();
@@ -849,12 +849,12 @@ bool LazyValueInfoImpl::solveBlockValueImpl(LVILatticeVal &Res,
return true;
}
if (BBI->getType()->isIntegerTy()) {
- if (isa<CastInst>(BBI))
- return solveBlockValueCast(Res, BBI, BB);
-
+ if (auto *CI = dyn_cast<CastInst>(BBI))
+ return solveBlockValueCast(Res, CI, BB);
+
BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI);
if (BO && isa<ConstantInt>(BO->getOperand(1)))
- return solveBlockValueBinaryOp(Res, BBI, BB);
+ return solveBlockValueBinaryOp(Res, BO, BB);
}
DEBUG(dbgs() << " compute BB '" << BB->getName()
@@ -1168,9 +1168,9 @@ bool LazyValueInfoImpl::solveBlockValueSelect(LVILatticeVal &BBLV,
}
bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV,
- Instruction *BBI,
- BasicBlock *BB) {
- if (!BBI->getOperand(0)->getType()->isSized()) {
+ CastInst *CI,
+ BasicBlock *BB) {
+ if (!CI->getOperand(0)->getType()->isSized()) {
// Without knowing how wide the input is, we can't analyze it in any useful
// way.
BBLV = LVILatticeVal::getOverdefined();
@@ -1180,7 +1180,7 @@ bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV,
// Filter out casts we don't know how to reason about before attempting to
// recurse on our operand. This can cut a long search short if we know we're
// not going to be able to get any useful information anways.
- switch (BBI->getOpcode()) {
+ switch (CI->getOpcode()) {
case Instruction::Trunc:
case Instruction::SExt:
case Instruction::ZExt:
@@ -1197,44 +1197,43 @@ bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV,
// Figure out the range of the LHS. If that fails, we still apply the
// transfer rule on the full set since we may be able to locally infer
// interesting facts.
- if (!hasBlockValue(BBI->getOperand(0), BB))
- if (pushBlockValue(std::make_pair(BB, BBI->getOperand(0))))
+ if (!hasBlockValue(CI->getOperand(0), BB))
+ if (pushBlockValue(std::make_pair(BB, CI->getOperand(0))))
// More work to do before applying this transfer rule.
return false;
const unsigned OperandBitWidth =
- DL.getTypeSizeInBits(BBI->getOperand(0)->getType());
+ DL.getTypeSizeInBits(CI->getOperand(0)->getType());
ConstantRange LHSRange = ConstantRange(OperandBitWidth);
- if (hasBlockValue(BBI->getOperand(0), BB)) {
- LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
- intersectAssumeOrGuardBlockValueConstantRange(BBI->getOperand(0), LHSVal,
- BBI);
+ if (hasBlockValue(CI->getOperand(0), BB)) {
+ LVILatticeVal LHSVal = getBlockValue(CI->getOperand(0), BB);
+ intersectAssumeOrGuardBlockValueConstantRange(CI->getOperand(0), LHSVal,
+ CI);
if (LHSVal.isConstantRange())
LHSRange = LHSVal.getConstantRange();
}
- const unsigned ResultBitWidth =
- cast<IntegerType>(BBI->getType())->getBitWidth();
+ const unsigned ResultBitWidth = CI->getType()->getIntegerBitWidth();
// NOTE: We're currently limited by the set of operations that ConstantRange
// can evaluate symbolically. Enhancing that set will allows us to analyze
// more definitions.
- auto CastOp = (Instruction::CastOps) BBI->getOpcode();
- BBLV = LVILatticeVal::getRange(LHSRange.castOp(CastOp, ResultBitWidth));
+ BBLV = LVILatticeVal::getRange(LHSRange.castOp(CI->getOpcode(),
+ ResultBitWidth));
return true;
}
bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
- Instruction *BBI,
+ BinaryOperator *BO,
BasicBlock *BB) {
- assert(BBI->getOperand(0)->getType()->isSized() &&
+ assert(BO->getOperand(0)->getType()->isSized() &&
"all operands to binary operators are sized");
// Filter out operators we don't know how to reason about before attempting to
// recurse on our operand(s). This can cut a long search short if we know
- // we're not going to be able to get any useful information anways.
- switch (BBI->getOpcode()) {
+ // we're not going to be able to get any useful information anyways.
+ switch (BO->getOpcode()) {
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
@@ -1256,29 +1255,29 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
// Figure out the range of the LHS. If that fails, use a conservative range,
// but apply the transfer rule anyways. This lets us pick up facts from
// expressions like "and i32 (call i32 @foo()), 32"
- if (!hasBlockValue(BBI->getOperand(0), BB))
- if (pushBlockValue(std::make_pair(BB, BBI->getOperand(0))))
+ if (!hasBlockValue(BO->getOperand(0), BB))
+ if (pushBlockValue(std::make_pair(BB, BO->getOperand(0))))
// More work to do before applying this transfer rule.
return false;
const unsigned OperandBitWidth =
- DL.getTypeSizeInBits(BBI->getOperand(0)->getType());
+ DL.getTypeSizeInBits(BO->getOperand(0)->getType());
ConstantRange LHSRange = ConstantRange(OperandBitWidth);
- if (hasBlockValue(BBI->getOperand(0), BB)) {
- LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
- intersectAssumeOrGuardBlockValueConstantRange(BBI->getOperand(0), LHSVal,
- BBI);
+ if (hasBlockValue(BO->getOperand(0), BB)) {
+ LVILatticeVal LHSVal = getBlockValue(BO->getOperand(0), BB);
+ intersectAssumeOrGuardBlockValueConstantRange(BO->getOperand(0), LHSVal,
+ BO);
if (LHSVal.isConstantRange())
LHSRange = LHSVal.getConstantRange();
}
- ConstantInt *RHS = cast<ConstantInt>(BBI->getOperand(1));
+ ConstantInt *RHS = cast<ConstantInt>(BO->getOperand(1));
ConstantRange RHSRange = ConstantRange(RHS->getValue());
// NOTE: We're currently limited by the set of operations that ConstantRange
// can evaluate symbolically. Enhancing that set will allows us to analyze
// more definitions.
- auto BinOp = (Instruction::BinaryOps) BBI->getOpcode();
+ Instruction::BinaryOps BinOp = BO->getOpcode();
BBLV = LVILatticeVal::getRange(LHSRange.binaryOp(BinOp, RHSRange));
return true;
}
diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp
index 26706f5509ba..3253f27c010d 100644
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -275,7 +275,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
// FIXME: refactor this to use the same code that inliner is using.
F.isVarArg();
GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport,
- /* LiveRoot = */ false);
+ /* Live = */ false);
auto FuncSummary = llvm::make_unique<FunctionSummary>(
Flags, NumInsts, RefEdges.takeVector(), CallGraphEdges.takeVector(),
TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(),
@@ -295,7 +295,7 @@ computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V,
findRefEdges(Index, &V, RefEdges, Visited);
bool NonRenamableLocal = isNonRenamableLocal(V);
GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal,
- /* LiveRoot = */ false);
+ /* Live = */ false);
auto GVarSummary =
llvm::make_unique<GlobalVarSummary>(Flags, RefEdges.takeVector());
if (NonRenamableLocal)
@@ -308,7 +308,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
DenseSet<GlobalValue::GUID> &CantBePromoted) {
bool NonRenamableLocal = isNonRenamableLocal(A);
GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal,
- /* LiveRoot = */ false);
+ /* Live = */ false);
auto AS = llvm::make_unique<AliasSummary>(Flags, ArrayRef<ValueInfo>{});
auto *Aliasee = A.getBaseObject();
auto *AliaseeSummary = Index.getGlobalValueSummary(*Aliasee);
@@ -323,7 +323,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) {
if (ValueInfo VI = Index.getValueInfo(GlobalValue::getGUID(Name)))
for (auto &Summary : VI.getSummaryList())
- Summary->setLiveRoot();
+ Summary->setLive(true);
}
ModuleSummaryIndex llvm::buildModuleSummaryIndex(
@@ -423,8 +423,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
return;
assert(GV->isDeclaration() && "Def in module asm already has definition");
GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage,
- /* NotEligibleToImport */ true,
- /* LiveRoot */ true);
+ /* NotEligibleToImport = */ true,
+ /* Live = */ true);
CantBePromoted.insert(GlobalValue::getGUID(Name));
// Create the appropriate summary type.
if (isa<Function>(GV)) {
diff --git a/lib/Analysis/OrderedBasicBlock.cpp b/lib/Analysis/OrderedBasicBlock.cpp
index 0f0016f22cc0..a04c0aef04be 100644
--- a/lib/Analysis/OrderedBasicBlock.cpp
+++ b/lib/Analysis/OrderedBasicBlock.cpp
@@ -55,7 +55,7 @@ bool OrderedBasicBlock::comesBefore(const Instruction *A,
assert(II != IE && "Instruction not found?");
assert((Inst == A || Inst == B) && "Should find A or B");
LastInstFound = II;
- return Inst == A;
+ return Inst != B;
}
/// \brief Find out whether \p A dominates \p B, meaning whether \p A
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index 82107cb18025..b38e6225c840 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -15,6 +15,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/RegionPass.h"
#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/IR/OptBisect.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
@@ -280,3 +281,18 @@ Pass *RegionPass::createPrinterPass(raw_ostream &O,
const std::string &Banner) const {
return new PrintRegionPass(Banner, O);
}
+
+bool RegionPass::skipRegion(Region &R) const {
+ Function &F = *R.getEntry()->getParent();
+ if (!F.getContext().getOptBisect().shouldRunPass(this, R))
+ return true;
+
+ if (F.hasFnAttribute(Attribute::OptimizeNone)) {
+ // Report this only once per function.
+ if (R.getEntry() == &F.getEntryBlock())
+ DEBUG(dbgs() << "Skipping pass '" << getPassName()
+ << "' on function " << F.getName() << "\n");
+ return true;
+ }
+ return false;
+}
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 686c94687669..fffa9045b2fd 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -865,11 +865,11 @@ static GlobalValueSummary::GVFlags getDecodedGVSummaryFlags(uint64_t RawFlags,
auto Linkage = GlobalValue::LinkageTypes(RawFlags & 0xF); // 4 bits
RawFlags = RawFlags >> 4;
bool NotEligibleToImport = (RawFlags & 0x1) || Version < 3;
- // The LiveRoot flag wasn't introduced until version 3. For dead stripping
+ // The Live flag wasn't introduced until version 3. For dead stripping
// to work correctly on earlier versions, we must conservatively treat all
// values as live.
- bool LiveRoot = (RawFlags & 0x2) || Version < 3;
- return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, LiveRoot);
+ bool Live = (RawFlags & 0x2) || Version < 3;
+ return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, Live);
}
static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) {
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index a402b4ddd462..9043b8c12d25 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -351,7 +351,8 @@ public:
/// Calls the callback for each value GUID and summary to be written to
/// bitcode. This hides the details of whether they are being pulled from the
/// entire index or just those in a provided ModuleToSummariesForIndex map.
- void forEachSummary(std::function<void(GVInfo)> Callback) {
+ template<typename Functor>
+ void forEachSummary(Functor Callback) {
if (ModuleToSummariesForIndex) {
for (auto &M : *ModuleToSummariesForIndex)
for (auto &Summary : M.second)
@@ -363,6 +364,29 @@ public:
}
}
+ /// Calls the callback for each entry in the modulePaths StringMap that
+ /// should be written to the module path string table. This hides the details
+ /// of whether they are being pulled from the entire index or just those in a
+ /// provided ModuleToSummariesForIndex map.
+ template <typename Functor> void forEachModule(Functor Callback) {
+ if (ModuleToSummariesForIndex) {
+ for (const auto &M : *ModuleToSummariesForIndex) {
+ const auto &MPI = Index.modulePaths().find(M.first);
+ if (MPI == Index.modulePaths().end()) {
+ // This should only happen if the bitcode file was empty, in which
+ // case we shouldn't be importing (the ModuleToSummariesForIndex
+ // would only include the module we are writing and index for).
+ assert(ModuleToSummariesForIndex->size() == 1);
+ continue;
+ }
+ Callback(*MPI);
+ }
+ } else {
+ for (const auto &MPSE : Index.modulePaths())
+ Callback(MPSE);
+ }
+ }
+
/// Main entry point for writing a combined index to bitcode.
void write();
@@ -370,14 +394,6 @@ private:
void writeModStrings();
void writeCombinedGlobalValueSummary();
- /// Indicates whether the provided \p ModulePath should be written into
- /// the module string table, e.g. if full index written or if it is in
- /// the provided subset.
- bool doIncludeModule(StringRef ModulePath) {
- return !ModuleToSummariesForIndex ||
- ModuleToSummariesForIndex->count(ModulePath);
- }
-
Optional<unsigned> getValueId(GlobalValue::GUID ValGUID) {
auto VMI = GUIDToValueIdMap.find(ValGUID);
if (VMI == GUIDToValueIdMap.end())
@@ -864,7 +880,7 @@ static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags) {
uint64_t RawFlags = 0;
RawFlags |= Flags.NotEligibleToImport; // bool
- RawFlags |= (Flags.LiveRoot << 1);
+ RawFlags |= (Flags.Live << 1);
// Linkage don't need to be remapped at that time for the summary. Any future
// change to the getEncodedLinkage() function will need to be taken into
// account here as well.
@@ -968,19 +984,18 @@ void ModuleBitcodeWriter::writeValueSymbolTableForwardDecl() {
enum StringEncoding { SE_Char6, SE_Fixed7, SE_Fixed8 };
/// Determine the encoding to use for the given string name and length.
-static StringEncoding getStringEncoding(const char *Str, unsigned StrLen) {
+static StringEncoding getStringEncoding(StringRef Str) {
bool isChar6 = true;
- for (const char *C = Str, *E = C + StrLen; C != E; ++C) {
+ for (char C : Str) {
if (isChar6)
- isChar6 = BitCodeAbbrevOp::isChar6(*C);
- if ((unsigned char)*C & 128)
+ isChar6 = BitCodeAbbrevOp::isChar6(C);
+ if ((unsigned char)C & 128)
// don't bother scanning the rest.
return SE_Fixed8;
}
if (isChar6)
return SE_Char6;
- else
- return SE_Fixed7;
+ return SE_Fixed7;
}
/// Emit top-level description of module, including target triple, inline asm,
@@ -1073,8 +1088,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
SmallVector<unsigned, 64> Vals;
// Emit the module's source file name.
{
- StringEncoding Bits = getStringEncoding(M.getSourceFileName().data(),
- M.getSourceFileName().size());
+ StringEncoding Bits = getStringEncoding(M.getSourceFileName());
BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8);
if (Bits == SE_Char6)
AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6);
@@ -2790,8 +2804,7 @@ void ModuleBitcodeWriter::writeFunctionLevelValueSymbolTable(
for (const ValueName &Name : VST) {
// Figure out the encoding to use for the name.
- StringEncoding Bits =
- getStringEncoding(Name.getKeyData(), Name.getKeyLength());
+ StringEncoding Bits = getStringEncoding(Name.getKey());
unsigned AbbrevToUse = VST_ENTRY_8_ABBREV;
NameVals.push_back(VE.getValueID(Name.getValue()));
@@ -3149,41 +3162,33 @@ void IndexBitcodeWriter::writeModStrings() {
unsigned AbbrevHash = Stream.EmitAbbrev(std::move(Abbv));
SmallVector<unsigned, 64> Vals;
- for (const auto &MPSE : Index.modulePaths()) {
- if (!doIncludeModule(MPSE.getKey()))
- continue;
- StringEncoding Bits =
- getStringEncoding(MPSE.getKey().data(), MPSE.getKey().size());
- unsigned AbbrevToUse = Abbrev8Bit;
- if (Bits == SE_Char6)
- AbbrevToUse = Abbrev6Bit;
- else if (Bits == SE_Fixed7)
- AbbrevToUse = Abbrev7Bit;
-
- Vals.push_back(MPSE.getValue().first);
-
- for (const auto P : MPSE.getKey())
- Vals.push_back((unsigned char)P);
-
- // Emit the finished record.
- Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse);
-
- Vals.clear();
- // Emit an optional hash for the module now
- auto &Hash = MPSE.getValue().second;
- bool AllZero = true; // Detect if the hash is empty, and do not generate it
- for (auto Val : Hash) {
- if (Val)
- AllZero = false;
- Vals.push_back(Val);
- }
- if (!AllZero) {
- // Emit the hash record.
- Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash);
- }
+ forEachModule(
+ [&](const StringMapEntry<std::pair<uint64_t, ModuleHash>> &MPSE) {
+ StringRef Key = MPSE.getKey();
+ const auto &Value = MPSE.getValue();
+ StringEncoding Bits = getStringEncoding(Key);
+ unsigned AbbrevToUse = Abbrev8Bit;
+ if (Bits == SE_Char6)
+ AbbrevToUse = Abbrev6Bit;
+ else if (Bits == SE_Fixed7)
+ AbbrevToUse = Abbrev7Bit;
+
+ Vals.push_back(Value.first);
+ Vals.append(Key.begin(), Key.end());
+
+ // Emit the finished record.
+ Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse);
+
+ // Emit an optional hash for the module now
+ const auto &Hash = Value.second;
+ if (llvm::any_of(Hash, [](uint32_t H) { return H; })) {
+ Vals.assign(Hash.begin(), Hash.end());
+ // Emit the hash record.
+ Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash);
+ }
- Vals.clear();
- }
+ Vals.clear();
+ });
Stream.ExitBlock();
}
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index 20e1467b30c3..c2ad9db81cfd 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -194,6 +194,10 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
// some variables.
for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isDef() && MO.getReg()) {
+ // Ignore call instructions that claim to clobber SP. The AArch64
+ // backend does this for aggregate function arguments.
+ if (MI.isCall() && MO.getReg() == SP)
+ continue;
// If this is a virtual register, only clobber it since it doesn't
// have aliases.
if (TRI->isVirtualRegister(MO.getReg()))
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 4d30c6574b12..256a0c95d365 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -77,6 +77,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializePostRASchedulerPass(Registry);
initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
initializeProcessImplicitDefsPass(Registry);
+ initializeRABasicPass(Registry);
initializeRAGreedyPass(Registry);
initializeRegisterCoalescerPass(Registry);
initializeRenameIndependentSubregsPass(Registry);
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index 23812a2a2344..3603f9b7ed93 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -556,6 +556,10 @@ bool GlobalMerge::doInitialization(Module &M) {
if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasSection())
continue;
+ // It's not safe to merge globals that may be preempted
+ if (TM && !TM->shouldAssumeDSOLocal(M, &GV))
+ continue;
+
if (!(MergeExternalGlobals && GV.hasExternalLinkage()) &&
!GV.hasInternalLinkage())
continue;
diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp
index 0dc1079b2ad4..cde6ccd29dfd 100644
--- a/lib/CodeGen/LivePhysRegs.cpp
+++ b/lib/CodeGen/LivePhysRegs.cpp
@@ -198,13 +198,12 @@ void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) {
}
void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) {
+ const MachineFunction &MF = *MBB.getParent();
if (!MBB.succ_empty()) {
- const MachineFunction &MF = *MBB.getParent();
addPristines(*this, MF);
addLiveOutsNoPristines(MBB);
} else if (MBB.isReturnBlock()) {
// For the return block: Add all callee saved registers.
- const MachineFunction &MF = *MBB.getParent();
const MachineFrameInfo &MFI = MF.getFrameInfo();
if (MFI.isCalleeSavedInfoValid())
addCalleeSavedRegs(*this, MF);
diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp
index dff555f49565..3746b74e0528 100644
--- a/lib/CodeGen/LiveRegUnits.cpp
+++ b/lib/CodeGen/LiveRegUnits.cpp
@@ -12,11 +12,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LiveRegUnits.h"
+
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -81,46 +83,50 @@ void LiveRegUnits::accumulateBackward(const MachineInstr &MI) {
}
/// Add live-in registers of basic block \p MBB to \p LiveUnits.
-static void addLiveIns(LiveRegUnits &LiveUnits, const MachineBasicBlock &MBB) {
+static void addBlockLiveIns(LiveRegUnits &LiveUnits,
+ const MachineBasicBlock &MBB) {
for (const auto &LI : MBB.liveins())
LiveUnits.addRegMasked(LI.PhysReg, LI.LaneMask);
}
-static void addLiveOuts(LiveRegUnits &LiveUnits, const MachineBasicBlock &MBB) {
- // To get the live-outs we simply merge the live-ins of all successors.
- for (const MachineBasicBlock *Succ : MBB.successors())
- addLiveIns(LiveUnits, *Succ);
+/// Adds all callee saved registers to \p LiveUnits.
+static void addCalleeSavedRegs(LiveRegUnits &LiveUnits,
+ const MachineFunction &MF) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR)
+ LiveUnits.addReg(*CSR);
}
-/// Add pristine registers to the given \p LiveUnits. This function removes
-/// actually saved callee save registers when \p InPrologueEpilogue is false.
-static void removeSavedRegs(LiveRegUnits &LiveUnits, const MachineFunction &MF,
- const MachineFrameInfo &MFI,
- const TargetRegisterInfo &TRI) {
+/// Adds pristine registers to the given \p LiveUnits. Pristine registers are
+/// callee saved registers that are unused in the function.
+static void addPristines(LiveRegUnits &LiveUnits, const MachineFunction &MF) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!MFI.isCalleeSavedInfoValid())
+ return;
+ /// Add all callee saved regs, then remove the ones that are saved+restored.
+ addCalleeSavedRegs(LiveUnits, MF);
+ /// Remove the ones that are not saved/restored; they are pristine.
for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
LiveUnits.removeReg(Info.getReg());
}
void LiveRegUnits::addLiveOuts(const MachineBasicBlock &MBB) {
const MachineFunction &MF = *MBB.getParent();
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- if (MFI.isCalleeSavedInfoValid()) {
- for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I)
- addReg(*I);
- if (!MBB.isReturnBlock())
- removeSavedRegs(*this, MF, MFI, *TRI);
+ if (!MBB.succ_empty()) {
+ addPristines(*this, MF);
+ // To get the live-outs we simply merge the live-ins of all successors.
+ for (const MachineBasicBlock *Succ : MBB.successors())
+ addBlockLiveIns(*this, *Succ);
+ } else if (MBB.isReturnBlock()) {
+ // For the return block: Add all callee saved registers.
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (MFI.isCalleeSavedInfoValid())
+ addCalleeSavedRegs(*this, MF);
}
- ::addLiveOuts(*this, MBB);
}
void LiveRegUnits::addLiveIns(const MachineBasicBlock &MBB) {
const MachineFunction &MF = *MBB.getParent();
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- if (MFI.isCalleeSavedInfoValid()) {
- for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I)
- addReg(*I);
- if (&MBB != &MF.front())
- removeSavedRegs(*this, MF, MFI, *TRI);
- }
- ::addLiveIns(*this, MBB);
+ addPristines(*this, MF);
+ addBlockLiveIns(*this, MBB);
}
diff --git a/lib/CodeGen/MachineRegionInfo.cpp b/lib/CodeGen/MachineRegionInfo.cpp
index 71ad4e6aa7f5..2402ffdbbcb1 100644
--- a/lib/CodeGen/MachineRegionInfo.cpp
+++ b/lib/CodeGen/MachineRegionInfo.cpp
@@ -1,7 +1,19 @@
-#include "llvm/CodeGen/MachineRegionInfo.h"
+//===- lib/Codegen/MachineRegionInfo.cpp ----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/RegionInfoImpl.h"
#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineRegionInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "machine-region-info"
@@ -11,36 +23,29 @@ STATISTIC(numMachineRegions, "The # of machine regions");
STATISTIC(numMachineSimpleRegions, "The # of simple machine regions");
namespace llvm {
+
template class RegionBase<RegionTraits<MachineFunction>>;
template class RegionNodeBase<RegionTraits<MachineFunction>>;
template class RegionInfoBase<RegionTraits<MachineFunction>>;
-}
+
+} // end namespace llvm
//===----------------------------------------------------------------------===//
// MachineRegion implementation
-//
MachineRegion::MachineRegion(MachineBasicBlock *Entry, MachineBasicBlock *Exit,
MachineRegionInfo* RI,
MachineDominatorTree *DT, MachineRegion *Parent) :
- RegionBase<RegionTraits<MachineFunction>>(Entry, Exit, RI, DT, Parent) {
+ RegionBase<RegionTraits<MachineFunction>>(Entry, Exit, RI, DT, Parent) {}
-}
-
-MachineRegion::~MachineRegion() { }
+MachineRegion::~MachineRegion() = default;
//===----------------------------------------------------------------------===//
// MachineRegionInfo implementation
-//
-
-MachineRegionInfo::MachineRegionInfo() :
- RegionInfoBase<RegionTraits<MachineFunction>>() {
-
-}
-MachineRegionInfo::~MachineRegionInfo() {
+MachineRegionInfo::MachineRegionInfo() = default;
-}
+MachineRegionInfo::~MachineRegionInfo() = default;
void MachineRegionInfo::updateStatistics(MachineRegion *R) {
++numMachineRegions;
@@ -73,9 +78,7 @@ MachineRegionInfoPass::MachineRegionInfoPass() : MachineFunctionPass(ID) {
initializeMachineRegionInfoPassPass(*PassRegistry::getPassRegistry());
}
-MachineRegionInfoPass::~MachineRegionInfoPass() {
-
-}
+MachineRegionInfoPass::~MachineRegionInfoPass() = default;
bool MachineRegionInfoPass::runOnMachineFunction(MachineFunction &F) {
releaseMemory();
@@ -137,8 +140,9 @@ INITIALIZE_PASS_END(MachineRegionInfoPass, DEBUG_TYPE,
// the link time optimization.
namespace llvm {
- FunctionPass *createMachineRegionInfoPass() {
- return new MachineRegionInfoPass();
- }
+
+FunctionPass *createMachineRegionInfoPass() {
+ return new MachineRegionInfoPass();
}
+} // end namespace llvm
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 265f93c363ca..f6dbf667cf02 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -36,6 +36,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
@@ -909,17 +910,43 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
}
}
- // Generic loads and stores must have a single MachineMemOperand
- // describing that access.
- if ((MI->getOpcode() == TargetOpcode::G_LOAD ||
- MI->getOpcode() == TargetOpcode::G_STORE) &&
- !MI->hasOneMemOperand())
- report("Generic instruction accessing memory must have one mem operand",
- MI);
-
StringRef ErrorInfo;
if (!TII->verifyInstruction(*MI, ErrorInfo))
report(ErrorInfo.data(), MI);
+
+ // Verify properties of various specific instruction types
+ switch(MI->getOpcode()) {
+ default:
+ break;
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_STORE:
+ // Generic loads and stores must have a single MachineMemOperand
+ // describing that access.
+ if (!MI->hasOneMemOperand())
+ report("Generic instruction accessing memory must have one mem operand",
+ MI);
+ break;
+ case TargetOpcode::STATEPOINT:
+ if (!MI->getOperand(StatepointOpers::IDPos).isImm() ||
+ !MI->getOperand(StatepointOpers::NBytesPos).isImm() ||
+ !MI->getOperand(StatepointOpers::NCallArgsPos).isImm())
+ report("meta operands to STATEPOINT not constant!", MI);
+ break;
+
+ auto VerifyStackMapConstant = [&](unsigned Offset) {
+ if (!MI->getOperand(Offset).isImm() ||
+ MI->getOperand(Offset).getImm() != StackMaps::ConstantOp ||
+ !MI->getOperand(Offset + 1).isImm())
+ report("stack map constant to STATEPOINT not well formed!", MI);
+ };
+ const unsigned VarStart = StatepointOpers(MI).getVarIdx();
+ VerifyStackMapConstant(VarStart + StatepointOpers::CCOffset);
+ VerifyStackMapConstant(VarStart + StatepointOpers::FlagsOffset);
+ VerifyStackMapConstant(VarStart + StatepointOpers::NumDeoptOperandsOffset);
+
+ // TODO: verify we have properly encoded deopt arguments
+
+ };
}
void
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index a9813e534c5f..e9f8d43fe643 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -54,8 +54,6 @@ static void doSpillCalleeSavedRegs(MachineFunction &MF, RegScavenger *RS,
const MBBVector &SaveBlocks,
const MBBVector &RestoreBlocks);
-static void doScavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger *RS);
-
namespace {
class PEI : public MachineFunctionPass {
public:
@@ -84,7 +82,7 @@ private:
const MBBVector &SaveBlocks,
const MBBVector &RestoreBlocks)>
SpillCalleeSavedRegisters;
- std::function<void(MachineFunction &MF, RegScavenger *RS)>
+ std::function<void(MachineFunction &MF, RegScavenger &RS)>
ScavengeFrameVirtualRegs;
bool UsesCalleeSaves = false;
@@ -142,7 +140,6 @@ MachineFunctionPass *llvm::createPrologEpilogInserterPass() {
return new PEI();
}
-STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
STATISTIC(NumBytesStackSpace,
"Number of bytes used for stack in all functions");
@@ -168,10 +165,10 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
SpillCalleeSavedRegisters = [](MachineFunction &, RegScavenger *,
unsigned &, unsigned &, const MBBVector &,
const MBBVector &) {};
- ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger *) {};
+ ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger &) {};
} else {
SpillCalleeSavedRegisters = doSpillCalleeSavedRegs;
- ScavengeFrameVirtualRegs = doScavengeFrameVirtualRegs;
+ ScavengeFrameVirtualRegs = scavengeFrameVirtualRegs;
UsesCalleeSaves = true;
}
}
@@ -222,7 +219,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// post-pass, scavenge the virtual registers that frame index elimination
// inserted.
if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) {
- ScavengeFrameVirtualRegs(Fn, RS);
+ ScavengeFrameVirtualRegs(Fn, *RS);
// Clear any vregs created by virtual scavenging.
Fn.getRegInfo().clearVirtRegs();
@@ -1153,92 +1150,3 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
RS->forward(MI);
}
}
-
-/// doScavengeFrameVirtualRegs - Replace all frame index virtual registers
-/// with physical registers. Use the register scavenger to find an
-/// appropriate register to use.
-///
-/// FIXME: Iterating over the instruction stream is unnecessary. We can simply
-/// iterate over the vreg use list, which at this point only contains machine
-/// operands for which eliminateFrameIndex need a new scratch reg.
-static void
-doScavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger *RS) {
- // Run through the instructions and find any virtual registers.
- MachineRegisterInfo &MRI = MF.getRegInfo();
- for (MachineBasicBlock &MBB : MF) {
- RS->enterBasicBlock(MBB);
-
- int SPAdj = 0;
-
- // The instruction stream may change in the loop, so check MBB.end()
- // directly.
- for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
- // We might end up here again with a NULL iterator if we scavenged a
- // register for which we inserted spill code for definition by what was
- // originally the first instruction in MBB.
- if (I == MachineBasicBlock::iterator(nullptr))
- I = MBB.begin();
-
- const MachineInstr &MI = *I;
- MachineBasicBlock::iterator J = std::next(I);
- MachineBasicBlock::iterator P =
- I == MBB.begin() ? MachineBasicBlock::iterator(nullptr)
- : std::prev(I);
-
- // RS should process this instruction before we might scavenge at this
- // location. This is because we might be replacing a virtual register
- // defined by this instruction, and if so, registers killed by this
- // instruction are available, and defined registers are not.
- RS->forward(I);
-
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
- continue;
-
- // When we first encounter a new virtual register, it
- // must be a definition.
- assert(MO.isDef() && "frame index virtual missing def!");
- // Scavenge a new scratch register
- const TargetRegisterClass *RC = MRI.getRegClass(Reg);
- unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj);
-
- ++NumScavengedRegs;
-
- // Replace this reference to the virtual register with the
- // scratch register.
- assert(ScratchReg && "Missing scratch register!");
- MRI.replaceRegWith(Reg, ScratchReg);
-
- // Because this instruction was processed by the RS before this
- // register was allocated, make sure that the RS now records the
- // register as being used.
- RS->setRegUsed(ScratchReg);
- }
-
- // If the scavenger needed to use one of its spill slots, the
- // spill code will have been inserted in between I and J. This is a
- // problem because we need the spill code before I: Move I to just
- // prior to J.
- if (I != std::prev(J)) {
- MBB.splice(J, &MBB, I);
-
- // Before we move I, we need to prepare the RS to visit I again.
- // Specifically, RS will assert if it sees uses of registers that
- // it believes are undefined. Because we have already processed
- // register kills in I, when it visits I again, it will believe that
- // those registers are undefined. To avoid this situation, unprocess
- // the instruction I.
- assert(RS->getCurrentPosition() == I &&
- "The register scavenger has an unexpected position");
- I = P;
- RS->unprocess(P);
- } else
- ++I;
- }
- }
-
- MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
-}
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index a87fed3a687e..24be7ea98d82 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -58,8 +58,9 @@ namespace {
/// whenever a register is unavailable. This is not practical in production but
/// provides a useful baseline both for measuring other allocators and comparing
/// the speed of the basic algorithm against other styles of allocators.
-class RABasic : public MachineFunctionPass, public RegAllocBase
-{
+class RABasic : public MachineFunctionPass,
+ public RegAllocBase,
+ private LiveRangeEdit::Delegate {
// context
MachineFunction *MF;
@@ -72,6 +73,9 @@ class RABasic : public MachineFunctionPass, public RegAllocBase
// selectOrSplit().
BitVector UsableRegs;
+ bool LRE_CanEraseVirtReg(unsigned) override;
+ void LRE_WillShrinkVirtReg(unsigned) override;
+
public:
RABasic();
@@ -121,17 +125,46 @@ char RABasic::ID = 0;
} // end anonymous namespace
+char &llvm::RABasicID = RABasic::ID;
+
+INITIALIZE_PASS_BEGIN(RABasic, "regallocbasic", "Basic Register Allocator",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
+INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
+INITIALIZE_PASS_END(RABasic, "regallocbasic", "Basic Register Allocator", false,
+ false)
+
+bool RABasic::LRE_CanEraseVirtReg(unsigned VirtReg) {
+ if (VRM->hasPhys(VirtReg)) {
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ Matrix->unassign(LI);
+ aboutToRemoveInterval(LI);
+ return true;
+ }
+ // Unassigned virtreg is probably in the priority queue.
+ // RegAllocBase will erase it after dequeueing.
+ return false;
+}
+
+void RABasic::LRE_WillShrinkVirtReg(unsigned VirtReg) {
+ if (!VRM->hasPhys(VirtReg))
+ return;
+
+ // Register is assigned, put it back on the queue for reassignment.
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ Matrix->unassign(LI);
+ enqueue(&LI);
+}
+
RABasic::RABasic(): MachineFunctionPass(ID) {
- initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
- initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
- initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
- initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
- initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
- initializeLiveStacksPass(*PassRegistry::getPassRegistry());
- initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
- initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
- initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
- initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry());
}
void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -200,7 +233,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
Matrix->unassign(Spill);
// Spill the extracted interval.
- LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats);
+ LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, this, &DeadRemats);
spiller().spill(LRE);
}
return true;
@@ -259,7 +292,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
if (!VirtReg.isSpillable())
return ~0u;
- LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats);
+ LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, this, &DeadRemats);
spiller().spill(LRE);
// The live virtual register requesting allocation was spilled, so tell
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 3b5964eef55e..b2dfef91add5 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -49,9 +49,11 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PBQP/Graph.h"
+#include "llvm/CodeGen/PBQP/Math.h"
#include "llvm/CodeGen/PBQP/Solution.h"
#include "llvm/CodeGen/PBQPRAConstraint.h"
#include "llvm/CodeGen/RegAllocPBQP.h"
@@ -139,13 +141,13 @@ public:
}
private:
- typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
- typedef std::vector<const LiveInterval*> Node2LIMap;
- typedef std::vector<unsigned> AllowedSet;
- typedef std::vector<AllowedSet> AllowedSetMap;
- typedef std::pair<unsigned, unsigned> RegPair;
- typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
- typedef std::set<unsigned> RegSet;
+ using LI2NodeMap = std::map<const LiveInterval *, unsigned>;
+ using Node2LIMap = std::vector<const LiveInterval *>;
+ using AllowedSet = std::vector<unsigned>;
+ using AllowedSetMap = std::vector<AllowedSet>;
+ using RegPair = std::pair<unsigned, unsigned>;
+ using CoalesceMap = std::map<RegPair, PBQP::PBQPNum>;
+ using RegSet = std::set<unsigned>;
char *customPassID;
@@ -212,12 +214,12 @@ public:
/// @brief Add interference edges between overlapping vregs.
class Interference : public PBQPRAConstraint {
private:
- typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr;
- typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IKey;
- typedef DenseMap<IKey, PBQPRAGraph::MatrixPtr> IMatrixCache;
- typedef DenseSet<IKey> DisjointAllowedRegsCache;
- typedef std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId> IEdgeKey;
- typedef DenseSet<IEdgeKey> IEdgeCache;
+ using AllowedRegVecPtr = const PBQP::RegAlloc::AllowedRegVector *;
+ using IKey = std::pair<AllowedRegVecPtr, AllowedRegVecPtr>;
+ using IMatrixCache = DenseMap<IKey, PBQPRAGraph::MatrixPtr>;
+ using DisjointAllowedRegsCache = DenseSet<IKey>;
+ using IEdgeKey = std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId>;
+ using IEdgeCache = DenseSet<IEdgeKey>;
bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
PBQPRAGraph::NodeId MId,
@@ -252,8 +254,8 @@ private:
// for the fast interference graph construction algorithm. The last is there
// to save us from looking up node ids via the VRegToNode map in the graph
// metadata.
- typedef std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId>
- IntervalInfo;
+ using IntervalInfo =
+ std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId>;
static SlotIndex getStartPoint(const IntervalInfo &I) {
return std::get<0>(I)->segments[std::get<1>(I)].start;
@@ -320,9 +322,10 @@ public:
// Cache known disjoint allowed registers pairs
DisjointAllowedRegsCache D;
- typedef std::set<IntervalInfo, decltype(&lowestEndPoint)> IntervalSet;
- typedef std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
- decltype(&lowestStartPoint)> IntervalQueue;
+ using IntervalSet = std::set<IntervalInfo, decltype(&lowestEndPoint)>;
+ using IntervalQueue =
+ std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
+ decltype(&lowestStartPoint)>;
IntervalSet Active(lowestEndPoint);
IntervalQueue Inactive(lowestStartPoint);
@@ -658,7 +661,6 @@ void RegAllocPBQP::spillVReg(unsigned VReg,
SmallVectorImpl<unsigned> &NewIntervals,
MachineFunction &MF, LiveIntervals &LIS,
VirtRegMap &VRM, Spiller &VRegSpiller) {
-
VRegsToAlloc.erase(VReg);
LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM,
nullptr, &DeadRemats);
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 0635e5c0a63c..1aed58c36e17 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -15,18 +15,23 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/RegisterScavenging.h"
+
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/PassSupport.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -39,6 +44,8 @@ using namespace llvm;
#define DEBUG_TYPE "reg-scavenging"
+STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
+
void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) {
LiveUnits.addRegMasked(Reg, LaneMask);
}
@@ -469,3 +476,120 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
return SReg;
}
+
+void llvm::scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS) {
+ // FIXME: Iterating over the instruction stream is unnecessary. We can simply
+ // iterate over the vreg use list, which at this point only contains machine
+ // operands for which eliminateFrameIndex need a new scratch reg.
+
+ // Run through the instructions and find any virtual registers.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (MachineBasicBlock &MBB : MF) {
+ RS.enterBasicBlock(MBB);
+
+ int SPAdj = 0;
+
+ // The instruction stream may change in the loop, so check MBB.end()
+ // directly.
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
+ // We might end up here again with a NULL iterator if we scavenged a
+ // register for which we inserted spill code for definition by what was
+ // originally the first instruction in MBB.
+ if (I == MachineBasicBlock::iterator(nullptr))
+ I = MBB.begin();
+
+ const MachineInstr &MI = *I;
+ MachineBasicBlock::iterator J = std::next(I);
+ MachineBasicBlock::iterator P =
+ I == MBB.begin() ? MachineBasicBlock::iterator(nullptr)
+ : std::prev(I);
+
+ // RS should process this instruction before we might scavenge at this
+ // location. This is because we might be replacing a virtual register
+ // defined by this instruction, and if so, registers killed by this
+ // instruction are available, and defined registers are not.
+ RS.forward(I);
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ // When we first encounter a new virtual register, it
+ // must be a definition.
+ assert(MO.isDef() && "frame index virtual missing def!");
+ // Scavenge a new scratch register
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ unsigned ScratchReg = RS.scavengeRegister(RC, J, SPAdj);
+
+ ++NumScavengedRegs;
+
+ // Replace this reference to the virtual register with the
+ // scratch register.
+ assert(ScratchReg && "Missing scratch register!");
+ MRI.replaceRegWith(Reg, ScratchReg);
+
+ // Because this instruction was processed by the RS before this
+ // register was allocated, make sure that the RS now records the
+ // register as being used.
+ RS.setRegUsed(ScratchReg);
+ }
+
+ // If the scavenger needed to use one of its spill slots, the
+ // spill code will have been inserted in between I and J. This is a
+ // problem because we need the spill code before I: Move I to just
+ // prior to J.
+ if (I != std::prev(J)) {
+ MBB.splice(J, &MBB, I);
+
+ // Before we move I, we need to prepare the RS to visit I again.
+ // Specifically, RS will assert if it sees uses of registers that
+ // it believes are undefined. Because we have already processed
+ // register kills in I, when it visits I again, it will believe that
+ // those registers are undefined. To avoid this situation, unprocess
+ // the instruction I.
+ assert(RS.getCurrentPosition() == I &&
+ "The register scavenger has an unexpected position");
+ I = P;
+ RS.unprocess(P);
+ } else
+ ++I;
+ }
+ }
+
+ MRI.clearVirtRegs();
+ MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
+}
+
+namespace {
+/// This class runs register scavenging independ of the PrologEpilogInserter.
+/// This is used in for testing.
+class ScavengerTest : public MachineFunctionPass {
+public:
+ static char ID;
+ ScavengerTest() : MachineFunctionPass(ID) {}
+ bool runOnMachineFunction(MachineFunction &MF) {
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetFrameLowering &TFL = *STI.getFrameLowering();
+
+ RegScavenger RS;
+ // Let's hope that calling those outside of PrologEpilogueInserter works
+ // well enough to initialize the scavenger with some emergency spillslots
+ // for the target.
+ BitVector SavedRegs;
+ TFL.determineCalleeSaves(MF, SavedRegs, &RS);
+ TFL.processFunctionBeforeFrameFinalized(MF, &RS);
+
+ // Let's scavenge the current function
+ scavengeFrameVirtualRegs(MF, RS);
+ return true;
+ }
+};
+char ScavengerTest::ID;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(ScavengerTest, "scavenger-test",
+ "Scavenge virtual registers inside basic blocks", false, false)
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 8035ea80364b..3fdbd2459361 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -12,32 +12,54 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/ADT/IntEqClasses.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/ScheduleDFS.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Type.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <string>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -90,11 +112,9 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo *mli,
bool RemoveKillFlags)
: ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
- RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false),
- TrackLaneMasks(false), AAForDep(nullptr), BarrierChain(nullptr),
+ RemoveKillFlags(RemoveKillFlags),
UnknownValue(UndefValue::get(
- Type::getVoidTy(mf.getFunction()->getContext()))),
- FirstDbgValue(nullptr) {
+ Type::getVoidTy(mf.getFunction()->getContext()))) {
DbgValues.clear();
const TargetSubtargetInfo &ST = mf.getSubtarget();
@@ -126,7 +146,7 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
return V;
}
assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
- } while (1);
+ } while (true);
}
/// This is a wrapper around GetUnderlyingObjects and adds support for basic
@@ -563,7 +583,7 @@ void ScheduleDAGInstrs::initSUnits() {
// which is contained within a basic block.
SUnits.reserve(NumRegionInstrs);
- for (MachineInstr &MI : llvm::make_range(RegionBegin, RegionEnd)) {
+ for (MachineInstr &MI : make_range(RegionBegin, RegionEnd)) {
if (MI.isDebugValue())
continue;
@@ -606,13 +626,13 @@ void ScheduleDAGInstrs::initSUnits() {
class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
/// Current total number of SUs in map.
- unsigned NumNodes;
+ unsigned NumNodes = 0;
/// 1 for loads, 0 for stores. (see comment in SUList)
unsigned TrueMemOrderLatency;
public:
- Value2SUsMap(unsigned lat = 0) : NumNodes(0), TrueMemOrderLatency(lat) {}
+ Value2SUsMap(unsigned lat = 0) : TrueMemOrderLatency(lat) {}
/// To keep NumNodes up to date, insert() is used instead of
/// this operator w/ push_back().
@@ -630,7 +650,7 @@ public:
void inline clearList(ValueType V) {
iterator Itr = find(V);
if (Itr != end()) {
- assert (NumNodes >= Itr->second.size());
+ assert(NumNodes >= Itr->second.size());
NumNodes -= Itr->second.size();
Itr->second.clear();
@@ -646,7 +666,7 @@ public:
unsigned inline size() const { return NumNodes; }
/// Counts the number of SUs in this map after a reduction.
- void reComputeSize(void) {
+ void reComputeSize() {
NumNodes = 0;
for (auto &I : *this)
NumNodes += I.second.size();
@@ -676,7 +696,7 @@ void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
}
void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) {
- assert (BarrierChain != nullptr);
+ assert(BarrierChain != nullptr);
for (auto &I : map) {
SUList &sus = I.second;
@@ -687,7 +707,7 @@ void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) {
}
void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) {
- assert (BarrierChain != nullptr);
+ assert(BarrierChain != nullptr);
// Go through all lists of SUs.
for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
@@ -1028,7 +1048,7 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
// The N last elements in NodeNums will be removed, and the SU with
// the lowest NodeNum of them will become the new BarrierChain to
// let the not yet seen SUs have a dependency to the removed SUs.
- assert (N <= NodeNums.size());
+ assert(N <= NodeNums.size());
SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
if (BarrierChain) {
// The aliasing and non-aliasing maps reduce independently of each
@@ -1156,6 +1176,7 @@ std::string ScheduleDAGInstrs::getDAGName() const {
//===----------------------------------------------------------------------===//
namespace llvm {
+
/// Internal state used to compute SchedDFSResult.
class SchedDFSImpl {
SchedDFSResult &R;
@@ -1163,16 +1184,16 @@ class SchedDFSImpl {
/// Join DAG nodes into equivalence classes by their subtree.
IntEqClasses SubtreeClasses;
/// List PredSU, SuccSU pairs that represent data edges between subtrees.
- std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs;
+ std::vector<std::pair<const SUnit *, const SUnit*>> ConnectionPairs;
struct RootData {
unsigned NodeID;
unsigned ParentNodeID; ///< Parent node (member of the parent subtree).
- unsigned SubInstrCount; ///< Instr count in this tree only, not children.
+ unsigned SubInstrCount = 0; ///< Instr count in this tree only, not
+ /// children.
RootData(unsigned id): NodeID(id),
- ParentNodeID(SchedDFSResult::InvalidSubtreeID),
- SubInstrCount(0) {}
+ ParentNodeID(SchedDFSResult::InvalidSubtreeID) {}
unsigned getSparseSetIndex() const { return NodeID; }
};
@@ -1340,12 +1361,15 @@ protected:
} while (FromTree != SchedDFSResult::InvalidSubtreeID);
}
};
+
} // end namespace llvm
namespace {
+
/// Manage the stack used by a reverse depth-first search over the DAG.
class SchedDAGReverseDFS {
- std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack;
+ std::vector<std::pair<const SUnit *, SUnit::const_pred_iterator>> DFSStack;
+
public:
bool isComplete() const { return DFSStack.empty(); }
@@ -1367,7 +1391,8 @@ public:
return getCurr()->Preds.end();
}
};
-} // anonymous
+
+} // end anonymous namespace
static bool hasDataSucc(const SUnit *SU) {
for (const SDep &SuccDep : SU->Succs) {
@@ -1392,7 +1417,7 @@ void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
SchedDAGReverseDFS DFS;
Impl.visitPreorder(&SU);
DFS.follow(&SU);
- for (;;) {
+ while (true) {
// Traverse the leftmost path as far as possible.
while (DFS.getPred() != DFS.getPredEnd()) {
const SDep &PredDep = *DFS.getPred();
@@ -1457,4 +1482,5 @@ raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
}
} // end namespace llvm
+
#endif
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 5f167f8de1cf..9355dbe77f94 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -225,6 +225,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
}
return TranslateLegalizeResults(Op, Lowered);
}
+ LLVM_FALLTHROUGH;
case TargetLowering::Expand:
Changed = true;
return LegalizeOp(ExpandLoad(Op));
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 177898e1e950..80a03ea4eea0 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1,4 +1,4 @@
-//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===//
+//===- SelectionDAG.cpp - Implement the SelectionDAG data structures ------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,29 +11,46 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/SelectionDAG.h"
#include "SDNodeDbgValue.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
-#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
-#include "llvm/IR/CallingConv.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalAlias.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
@@ -41,16 +58,20 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
-#include <cmath>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <limits>
+#include <set>
+#include <string>
#include <utility>
+#include <vector>
using namespace llvm;
@@ -269,7 +290,6 @@ ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
return ISD::CondCode(Operation);
}
-
/// For an integer comparison, return 1 if the comparison is a signed operation
/// and 2 if the result is an unsigned comparison. Return zero if the operation
/// does not depend on the sign of the input (setne and seteq).
@@ -338,7 +358,6 @@ ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
//===----------------------------------------------------------------------===//
/// AddNodeIDOpcode - Add the node opcode to the NodeID data.
-///
static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) {
ID.AddInteger(OpC);
}
@@ -350,7 +369,6 @@ static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
}
/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
-///
static void AddNodeIDOperands(FoldingSetNodeID &ID,
ArrayRef<SDValue> Ops) {
for (auto& Op : Ops) {
@@ -360,7 +378,6 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID,
}
/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
-///
static void AddNodeIDOperands(FoldingSetNodeID &ID,
ArrayRef<SDUse> Ops) {
for (auto& Op : Ops) {
@@ -392,10 +409,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
break;
}
case ISD::TargetConstantFP:
- case ISD::ConstantFP: {
+ case ISD::ConstantFP:
ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue());
break;
- }
case ISD::TargetGlobalAddress:
case ISD::GlobalAddress:
case ISD::TargetGlobalTLSAddress:
@@ -770,7 +786,6 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
/// maps and modified in place. Add it back to the CSE maps, unless an identical
/// node already exists, in which case transfer all its users to the existing
/// node. This transfer can potentially trigger recursive merging.
-///
void
SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) {
// For node types that aren't CSE'd, just act as if no identical node
@@ -835,7 +850,6 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
return Node;
}
-
/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
/// were replaced with those specified. If this node is never memoized,
/// return null, otherwise return a pointer to the slot it would take. If a
@@ -864,10 +878,9 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
// EntryNode could meaningfully have debug info if we can find it...
SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
- : TM(tm), TSI(nullptr), TLI(nullptr), OptLevel(OL),
+ : TM(tm), OptLevel(OL),
EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
- Root(getEntryNode()), NewNodesMustHaveLegalTypes(false),
- UpdateListeners(nullptr) {
+ Root(getEntryNode()) {
InsertNode(&EntryNode);
DbgInfo = new SDDbgInfo();
}
@@ -1038,7 +1051,6 @@ SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL,
}
/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
-///
SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
EVT EltVT = VT.getScalarType();
SDValue NegOne =
@@ -1317,7 +1329,6 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
return SDValue(N, 0);
}
-
SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
unsigned Alignment, int Offset,
bool isTarget,
@@ -1451,7 +1462,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
// Validate that all indices in Mask are within the range of the elements
// input to the shuffle.
int NElts = Mask.size();
- assert(all_of(Mask, [&](int M) { return M < (NElts * 2); }) &&
+ assert(llvm::all_of(Mask, [&](int M) { return M < (NElts * 2); }) &&
"Index out of range");
// Copy the mask so we can do any needed cleanup.
@@ -2918,7 +2929,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
else
DemandedRHS.setBit((unsigned)M % NumElts);
}
- Tmp = UINT_MAX;
+ Tmp = std::numeric_limits<unsigned>::max();
if (!!DemandedLHS)
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1);
if (!!DemandedRHS) {
@@ -3122,7 +3133,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
unsigned EltIdx = CEltNo->getZExtValue();
// If we demand the inserted element then get its sign bits.
- Tmp = UINT_MAX;
+ Tmp = std::numeric_limits<unsigned>::max();
if (DemandedElts[EltIdx]) {
// TODO - handle implicit truncation of inserted elements.
if (InVal.getScalarValueSizeInBits() != VTBits)
@@ -3188,7 +3199,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::CONCAT_VECTORS:
// Determine the minimum number of sign bits across all demanded
// elts of the input vectors. Early out if the result is already 1.
- Tmp = UINT_MAX;
+ Tmp = std::numeric_limits<unsigned>::max();
EVT SubVectorVT = Op.getOperand(0).getValueType();
unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements();
unsigned NumSubVectors = Op.getNumOperands();
@@ -3327,7 +3338,7 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops,
- llvm::SelectionDAG &DAG) {
+ SelectionDAG &DAG) {
assert(!Ops.empty() && "Can't concatenate an empty list of vectors!");
assert(llvm::all_of(Ops,
[Ops](SDValue Op) {
@@ -3836,8 +3847,9 @@ bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {
return true;
return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) &&
- any_of(Divisor->op_values(),
- [](SDValue V) { return V.isUndef() || isNullConstant(V); });
+ llvm::any_of(Divisor->op_values(),
+ [](SDValue V) { return V.isUndef() ||
+ isNullConstant(V); });
// TODO: Handle signed overflow.
}
// TODO: Handle oversized shifts.
@@ -3948,8 +3960,8 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
// All operands must be vector types with the same number of elements as
// the result type and must be either UNDEF or a build vector of constant
// or UNDEF scalars.
- if (!all_of(Ops, IsConstantBuildVectorOrUndef) ||
- !all_of(Ops, IsScalarOrSameVectorSize))
+ if (!llvm::all_of(Ops, IsConstantBuildVectorOrUndef) ||
+ !llvm::all_of(Ops, IsScalarOrSameVectorSize))
return SDValue();
// If we are comparing vectors, then the result needs to be a i1 boolean
@@ -5550,7 +5562,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
Opcode == ISD::PREFETCH ||
Opcode == ISD::LIFETIME_START ||
Opcode == ISD::LIFETIME_END ||
- (Opcode <= INT_MAX &&
+ ((int)Opcode <= std::numeric_limits<int>::max() &&
(int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
"Opcode is not a memory-accessing opcode!");
@@ -5884,7 +5896,6 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Ptr, SDValue Mask, SDValue Src0,
EVT MemVT, MachineMemOperand *MMO,
ISD::LoadExtType ExtTy, bool isExpanding) {
-
SDVTList VTs = getVTList(VT, MVT::Other);
SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
FoldingSetNodeID ID;
@@ -6038,13 +6049,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
switch (Opcode) {
default: break;
- case ISD::CONCAT_VECTORS: {
+ case ISD::CONCAT_VECTORS:
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
return V;
break;
- }
- case ISD::SELECT_CC: {
+ case ISD::SELECT_CC:
assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
assert(Ops[0].getValueType() == Ops[1].getValueType() &&
"LHS and RHS of condition must have same type!");
@@ -6053,14 +6063,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(Ops[2].getValueType() == VT &&
"select_cc node must be of same type as true and false value!");
break;
- }
- case ISD::BR_CC: {
+ case ISD::BR_CC:
assert(NumOps == 5 && "BR_CC takes 5 operands!");
assert(Ops[2].getValueType() == Ops[3].getValueType() &&
"LHS/RHS of comparison should match types!");
break;
}
- }
// Memoize nodes.
SDNode *N;
@@ -6599,7 +6607,6 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
return Res;
}
-
/// getMachineNode - These are used for target selectors to create a new node
/// with specified return type(s), MachineInstr opcode, and operands.
///
@@ -6812,7 +6819,7 @@ public:
: SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {}
};
-}
+} // end anonymous namespace
/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
/// This can cause recursive merging of nodes in the DAG.
@@ -6858,7 +6865,6 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
AddModifiedNodeToCSEMaps(User);
}
-
// If we just RAUW'd the root, take note.
if (FromN == getRoot())
setRoot(To);
@@ -7028,6 +7034,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
}
namespace {
+
/// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith
/// to record information about a use.
struct UseMemo {
@@ -7040,7 +7047,8 @@ namespace {
bool operator<(const UseMemo &L, const UseMemo &R) {
return (intptr_t)L.User < (intptr_t)R.User;
}
-}
+
+} // end anonymous namespace
/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
/// uses of other values produced by From.getNode() alone. The same value
@@ -7106,7 +7114,6 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
/// based on their topological order. It returns the maximum id and a vector
/// of the SDNodes* in assigned order by reference.
unsigned SelectionDAG::AssignTopologicalOrder() {
-
unsigned DAGSize = 0;
// SortedPos tracks the progress of the algorithm. Nodes before it are
@@ -7333,6 +7340,7 @@ void SDNode::Profile(FoldingSetNodeID &ID) const {
}
namespace {
+
struct EVTArray {
std::vector<EVT> VTs;
@@ -7342,11 +7350,12 @@ namespace {
VTs.push_back(MVT((MVT::SimpleValueType)i));
}
};
-}
-static ManagedStatic<std::set<EVT, EVT::compareRawBits> > EVTs;
+} // end anonymous namespace
+
+static ManagedStatic<std::set<EVT, EVT::compareRawBits>> EVTs;
static ManagedStatic<EVTArray> SimpleVTArray;
-static ManagedStatic<sys::SmartMutex<true> > VTMutex;
+static ManagedStatic<sys::SmartMutex<true>> VTMutex;
/// getValueTypeList - Return a pointer to the specified value type.
///
@@ -7380,7 +7389,6 @@ bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {
return NUses == 0;
}
-
/// hasAnyUseOfValue - Return true if there are any use of the indicated
/// value. This method ignores uses of other values defined by this operation.
bool SDNode::hasAnyUseOfValue(unsigned Value) const {
@@ -7393,9 +7401,7 @@ bool SDNode::hasAnyUseOfValue(unsigned Value) const {
return false;
}
-
/// isOnlyUserOf - Return true if this node is the only use of N.
-///
bool SDNode::isOnlyUserOf(const SDNode *N) const {
bool Seen = false;
for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
@@ -7425,7 +7431,6 @@ bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {
}
/// isOperand - Return true if this node is an operand of N.
-///
bool SDValue::isOperandOf(const SDNode *N) const {
for (const SDValue &Op : N->op_values())
if (*this == Op)
@@ -7475,7 +7480,7 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
}
// Next, try a deep search: check whether every operand of the TokenFactor
// reaches Dest.
- return all_of((*this)->ops(), [=](SDValue Op) {
+ return llvm::all_of((*this)->ops(), [=](SDValue Op) {
return Op.reachesChainWithoutSideEffects(Dest, Depth - 1);
});
}
@@ -7627,7 +7632,6 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
return false;
}
-
/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
/// it cannot be inferred.
unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
@@ -7718,7 +7722,6 @@ unsigned GlobalAddressSDNode::getAddressSpace() const {
return getGlobal()->getType()->getAddressSpace();
}
-
Type *ConstantPoolSDNode::getType() const {
if (isMachineConstantPoolEntry())
return Val.MachineCPVal->getType();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 687b882c5e4d..b5ccd64ee76c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -2022,7 +2022,7 @@ static SDNode *findGlueUse(SDNode *N) {
}
/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def".
-/// This function recursively traverses up the operand chain, ignoring
+/// This function iteratively traverses up the operand chain, ignoring
/// certain nodes.
static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
SDNode *Root, SmallPtrSetImpl<SDNode*> &Visited,
@@ -2035,30 +2035,36 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
// The Use may be -1 (unassigned) if it is a newly allocated node. This can
// happen because we scan down to newly selected nodes in the case of glue
// uses.
- if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1))
- return false;
+ std::vector<SDNode *> WorkList;
+ WorkList.push_back(Use);
- // Don't revisit nodes if we already scanned it and didn't fail, we know we
- // won't fail if we scan it again.
- if (!Visited.insert(Use).second)
- return false;
+ while (!WorkList.empty()) {
+ Use = WorkList.back();
+ WorkList.pop_back();
+ if (Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1)
+ continue;
- for (const SDValue &Op : Use->op_values()) {
- // Ignore chain uses, they are validated by HandleMergeInputChains.
- if (Op.getValueType() == MVT::Other && IgnoreChains)
+ // Don't revisit nodes if we already scanned it and didn't fail, we know we
+ // won't fail if we scan it again.
+ if (!Visited.insert(Use).second)
continue;
- SDNode *N = Op.getNode();
- if (N == Def) {
- if (Use == ImmedUse || Use == Root)
- continue; // We are not looking for immediate use.
- assert(N != Root);
- return true;
- }
+ for (const SDValue &Op : Use->op_values()) {
+ // Ignore chain uses, they are validated by HandleMergeInputChains.
+ if (Op.getValueType() == MVT::Other && IgnoreChains)
+ continue;
- // Traverse up the operand chain.
- if (findNonImmUse(N, Def, ImmedUse, Root, Visited, IgnoreChains))
- return true;
+ SDNode *N = Op.getNode();
+ if (N == Def) {
+ if (Use == ImmedUse || Use == Root)
+ continue; // We are not looking for immediate use.
+ assert(N != Root);
+ return true;
+ }
+
+ // Traverse up the operand chain.
+ WorkList.push_back(N);
+ }
}
return false;
}
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 1c66649cae01..eed667dbe7e0 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -818,7 +818,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
SI.GCTransitionArgs =
ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end());
SI.ID = ISP.getID();
- SI.DeoptState = ArrayRef<const Use>(ISP.vm_state_begin(), ISP.vm_state_end());
+ SI.DeoptState = ArrayRef<const Use>(ISP.deopt_begin(), ISP.deopt_end());
SI.StatepointFlags = ISP.getFlags();
SI.NumPatchBytes = ISP.getNumPatchBytes();
SI.EHPadBB = EHPadBB;
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 0dffffee9976..adb2b188265b 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1493,8 +1493,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
- // Ensure that the constant occurs on the RHS, and fold constant
- // comparisons.
+ // Ensure that the constant occurs on the RHS and fold constant comparisons.
ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
if (isa<ConstantSDNode>(N0.getNode()) &&
(DCI.isBeforeLegalizeOps() ||
@@ -1638,14 +1637,13 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
TopSetCC.getOperand(1),
InvCond);
-
}
}
}
- // If the LHS is '(and load, const)', the RHS is 0,
- // the test is for equality or unsigned, and all 1 bits of the const are
- // in the same partial word, see if we can shorten the load.
+ // If the LHS is '(and load, const)', the RHS is 0, the test is for
+ // equality or unsigned, and all 1 bits of the const are in the same
+ // partial word, see if we can shorten the load.
if (DCI.isBeforeLegalize() &&
!ISD::isSignedIntSetCC(Cond) &&
N0.getOpcode() == ISD::AND && C1 == 0 &&
@@ -1669,10 +1667,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
APInt newMask = APInt::getLowBitsSet(maskWidth, width);
for (unsigned offset=0; offset<origWidth/width; offset++) {
if ((newMask & Mask) == Mask) {
- if (!DAG.getDataLayout().isLittleEndian())
- bestOffset = (origWidth/width - offset - 1) * (width/8);
- else
+ if (DAG.getDataLayout().isLittleEndian())
bestOffset = (uint64_t)offset * (width/8);
+ else
+ bestOffset = (origWidth/width - offset - 1) * (width/8);
bestMask = Mask.lshr(offset * (width/8) * 8);
bestWidth = width;
break;
@@ -1713,10 +1711,12 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
switch (Cond) {
case ISD::SETUGT:
case ISD::SETUGE:
- case ISD::SETEQ: return DAG.getConstant(0, dl, VT);
+ case ISD::SETEQ:
+ return DAG.getConstant(0, dl, VT);
case ISD::SETULT:
case ISD::SETULE:
- case ISD::SETNE: return DAG.getConstant(1, dl, VT);
+ case ISD::SETNE:
+ return DAG.getConstant(1, dl, VT);
case ISD::SETGT:
case ISD::SETGE:
// True if the sign bit of C1 is set.
@@ -1816,9 +1816,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
BitWidth-1))) {
// Okay, get the un-inverted input value.
SDValue Val;
- if (N0.getOpcode() == ISD::XOR)
+ if (N0.getOpcode() == ISD::XOR) {
Val = N0.getOperand(0);
- else {
+ } else {
assert(N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::XOR);
// ((X^1)&1)^1 -> X & 1
@@ -1883,7 +1883,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Canonicalize GE/LE comparisons to use GT/LT comparisons.
if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
- if (C1 == MinVal) return DAG.getConstant(1, dl, VT); // X >= MIN --> true
+ // X >= MIN --> true
+ if (C1 == MinVal)
+ return DAG.getConstant(1, dl, VT);
+
// X >= C0 --> X > (C0 - 1)
APInt C = C1 - 1;
ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
@@ -1898,7 +1901,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
- if (C1 == MaxVal) return DAG.getConstant(1, dl, VT); // X <= MAX --> true
+ // X <= MAX --> true
+ if (C1 == MaxVal)
+ return DAG.getConstant(1, dl, VT);
+
// X <= C0 --> X < (C0 + 1)
APInt C = C1 + 1;
ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 900c0318b179..c43a5e18ad23 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -1456,6 +1456,7 @@ void TargetLoweringBase::computeRegisterProperties(
}
if (IsLegalWiderType)
break;
+ LLVM_FALLTHROUGH;
}
case TypeWidenVector: {
// Try to widen the vector.
@@ -1473,6 +1474,7 @@ void TargetLoweringBase::computeRegisterProperties(
}
if (IsLegalWiderType)
break;
+ LLVM_FALLTHROUGH;
}
case TypeSplitVector:
case TypeScalarizeVector: {
diff --git a/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp b/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
index 282e3103adc9..711144fc2faa 100644
--- a/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
+++ b/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
@@ -27,6 +27,14 @@ Error CodeViewRecordIO::beginRecord(Optional<uint32_t> MaxLength) {
Error CodeViewRecordIO::endRecord() {
assert(!Limits.empty() && "Not in a record!");
Limits.pop_back();
+ // We would like to assert that we actually read / wrote all the bytes that we
+ // expected to for this record, but unfortunately we can't do this. Some
+ // producers such as MASM over-allocate for certain types of records and
+ // commit the extraneous data, so when reading we can't be sure every byte
+ // will have been read. And when writing we over-allocate temporarily since
+ // we don't know how big the record is until we're finished writing it, so
+ // even though we don't commit the extraneous data, we still can't guarantee
+ // we're at the end of the allocated data.
return Error::success();
}
@@ -49,6 +57,12 @@ uint32_t CodeViewRecordIO::maxFieldLength() const {
return *Min;
}
+Error CodeViewRecordIO::padToAlignment(uint32_t Align) {
+ if (isReading())
+ return Reader->padToAlignment(Align);
+ return Writer->padToAlignment(Align);
+}
+
Error CodeViewRecordIO::skipPadding() {
assert(!isWriting() && "Cannot skip padding while writing!");
diff --git a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
index b8741eb0b675..2e72242181b0 100644
--- a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
@@ -72,7 +72,7 @@ Error DebugStringTableSubsection::commit(BinaryStreamWriter &Writer) const {
uint32_t DebugStringTableSubsection::size() const { return Strings.size(); }
uint32_t DebugStringTableSubsection::getStringId(StringRef S) const {
- auto P = Strings.find(S);
- assert(P != Strings.end());
- return P->second;
+ auto Iter = Strings.find(S);
+ assert(Iter != Strings.end());
+ return Iter->second;
}
diff --git a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
index 511f36d0020a..cfd1c5d3ab0c 100644
--- a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
+++ b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
@@ -16,14 +16,17 @@ using namespace llvm;
using namespace llvm::codeview;
DebugSubsectionRecord::DebugSubsectionRecord()
- : Kind(DebugSubsectionKind::None) {}
+ : Container(CodeViewContainer::ObjectFile),
+ Kind(DebugSubsectionKind::None) {}
DebugSubsectionRecord::DebugSubsectionRecord(DebugSubsectionKind Kind,
- BinaryStreamRef Data)
- : Kind(Kind), Data(Data) {}
+ BinaryStreamRef Data,
+ CodeViewContainer Container)
+ : Container(Container), Kind(Kind), Data(Data) {}
Error DebugSubsectionRecord::initialize(BinaryStreamRef Stream,
- DebugSubsectionRecord &Info) {
+ DebugSubsectionRecord &Info,
+ CodeViewContainer Container) {
const DebugSubsectionHeader *Header;
BinaryStreamReader Reader(Stream);
if (auto EC = Reader.readObject(Header))
@@ -41,13 +44,14 @@ Error DebugSubsectionRecord::initialize(BinaryStreamRef Stream,
}
if (auto EC = Reader.readStreamRef(Info.Data, Header->Length))
return EC;
+ Info.Container = Container;
Info.Kind = Kind;
return Error::success();
}
uint32_t DebugSubsectionRecord::getRecordLength() const {
uint32_t Result = sizeof(DebugSubsectionHeader) + Data.getLength();
- assert(Result % 4 == 0);
+ assert(Result % alignOf(Container) == 0);
return Result;
}
@@ -56,25 +60,29 @@ DebugSubsectionKind DebugSubsectionRecord::kind() const { return Kind; }
BinaryStreamRef DebugSubsectionRecord::getRecordData() const { return Data; }
DebugSubsectionRecordBuilder::DebugSubsectionRecordBuilder(
- DebugSubsectionKind Kind, DebugSubsection &Frag)
- : Kind(Kind), Frag(Frag) {}
+ std::unique_ptr<DebugSubsection> Subsection, CodeViewContainer Container)
+ : Subsection(std::move(Subsection)), Container(Container) {}
uint32_t DebugSubsectionRecordBuilder::calculateSerializedLength() {
- uint32_t Size = sizeof(DebugSubsectionHeader) +
- alignTo(Frag.calculateSerializedSize(), 4);
+ uint32_t Size =
+ sizeof(DebugSubsectionHeader) +
+ alignTo(Subsection->calculateSerializedSize(), alignOf(Container));
return Size;
}
Error DebugSubsectionRecordBuilder::commit(BinaryStreamWriter &Writer) {
+ assert(Writer.getOffset() % alignOf(Container) == 0 &&
+ "Debug Subsection not properly aligned");
+
DebugSubsectionHeader Header;
- Header.Kind = uint32_t(Kind);
+ Header.Kind = uint32_t(Subsection->kind());
Header.Length = calculateSerializedLength() - sizeof(DebugSubsectionHeader);
if (auto EC = Writer.writeObject(Header))
return EC;
- if (auto EC = Frag.commit(Writer))
+ if (auto EC = Subsection->commit(Writer))
return EC;
- if (auto EC = Writer.padToAlignment(4))
+ if (auto EC = Writer.padToAlignment(alignOf(Container)))
return EC;
return Error::success();
diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp
index 3d49a7198d1a..66045933ce9b 100644
--- a/lib/DebugInfo/CodeView/SymbolDumper.cpp
+++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp
@@ -668,7 +668,7 @@ Error CVSymbolDumperImpl::visitUnknownSymbol(CVSymbol &CVR) {
Error CVSymbolDumper::dump(CVRecord<SymbolKind> &Record) {
SymbolVisitorCallbackPipeline Pipeline;
- SymbolDeserializer Deserializer(ObjDelegate.get());
+ SymbolDeserializer Deserializer(ObjDelegate.get(), Container);
CVSymbolDumperImpl Dumper(Types, ObjDelegate.get(), W, PrintRecordBytes);
Pipeline.addCallbackToPipeline(Deserializer);
@@ -679,7 +679,7 @@ Error CVSymbolDumper::dump(CVRecord<SymbolKind> &Record) {
Error CVSymbolDumper::dump(const CVSymbolArray &Symbols) {
SymbolVisitorCallbackPipeline Pipeline;
- SymbolDeserializer Deserializer(ObjDelegate.get());
+ SymbolDeserializer Deserializer(ObjDelegate.get(), Container);
CVSymbolDumperImpl Dumper(Types, ObjDelegate.get(), W, PrintRecordBytes);
Pipeline.addCallbackToPipeline(Deserializer);
diff --git a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
index bb1731465495..ea46841a70f6 100644
--- a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
+++ b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
@@ -40,6 +40,7 @@ Error SymbolRecordMapping::visitSymbolBegin(CVSymbol &Record) {
}
Error SymbolRecordMapping::visitSymbolEnd(CVSymbol &Record) {
+ error(IO.padToAlignment(alignOf(Container)));
error(IO.endRecord());
return Error::success();
}
diff --git a/lib/DebugInfo/CodeView/SymbolSerializer.cpp b/lib/DebugInfo/CodeView/SymbolSerializer.cpp
index 251cc431f52b..9f2d619d1a1c 100644
--- a/lib/DebugInfo/CodeView/SymbolSerializer.cpp
+++ b/lib/DebugInfo/CodeView/SymbolSerializer.cpp
@@ -12,9 +12,11 @@
using namespace llvm;
using namespace llvm::codeview;
-SymbolSerializer::SymbolSerializer(BumpPtrAllocator &Allocator)
- : Storage(Allocator), RecordBuffer(MaxRecordLength), Stream(RecordBuffer, llvm::support::little),
- Writer(Stream), Mapping(Writer) { }
+SymbolSerializer::SymbolSerializer(BumpPtrAllocator &Allocator,
+ CodeViewContainer Container)
+ : Storage(Allocator), RecordBuffer(MaxRecordLength),
+ Stream(RecordBuffer, llvm::support::little), Writer(Stream),
+ Mapping(Writer, Container) {}
Error SymbolSerializer::visitSymbolBegin(CVSymbol &Record) {
assert(!CurrentSymbol.hasValue() && "Already in a symbol mapping!");
diff --git a/lib/DebugInfo/MSF/MappedBlockStream.cpp b/lib/DebugInfo/MSF/MappedBlockStream.cpp
index dfdeb8414212..faf2442bc94b 100644
--- a/lib/DebugInfo/MSF/MappedBlockStream.cpp
+++ b/lib/DebugInfo/MSF/MappedBlockStream.cpp
@@ -47,42 +47,46 @@ static Interval intersect(const Interval &I1, const Interval &I2) {
MappedBlockStream::MappedBlockStream(uint32_t BlockSize,
const MSFStreamLayout &Layout,
- BinaryStreamRef MsfData)
- : BlockSize(BlockSize), StreamLayout(Layout), MsfData(MsfData) {}
-
-std::unique_ptr<MappedBlockStream>
-MappedBlockStream::createStream(uint32_t BlockSize,
- const MSFStreamLayout &Layout,
- BinaryStreamRef MsfData) {
+ BinaryStreamRef MsfData,
+ BumpPtrAllocator &Allocator)
+ : BlockSize(BlockSize), StreamLayout(Layout), MsfData(MsfData),
+ Allocator(Allocator) {}
+
+std::unique_ptr<MappedBlockStream> MappedBlockStream::createStream(
+ uint32_t BlockSize, const MSFStreamLayout &Layout, BinaryStreamRef MsfData,
+ BumpPtrAllocator &Allocator) {
return llvm::make_unique<MappedBlockStreamImpl<MappedBlockStream>>(
- BlockSize, Layout, MsfData);
+ BlockSize, Layout, MsfData, Allocator);
}
std::unique_ptr<MappedBlockStream> MappedBlockStream::createIndexedStream(
- const MSFLayout &Layout, BinaryStreamRef MsfData, uint32_t StreamIndex) {
+ const MSFLayout &Layout, BinaryStreamRef MsfData, uint32_t StreamIndex,
+ BumpPtrAllocator &Allocator) {
assert(StreamIndex < Layout.StreamMap.size() && "Invalid stream index");
MSFStreamLayout SL;
SL.Blocks = Layout.StreamMap[StreamIndex];
SL.Length = Layout.StreamSizes[StreamIndex];
return llvm::make_unique<MappedBlockStreamImpl<MappedBlockStream>>(
- Layout.SB->BlockSize, SL, MsfData);
+ Layout.SB->BlockSize, SL, MsfData, Allocator);
}
std::unique_ptr<MappedBlockStream>
MappedBlockStream::createDirectoryStream(const MSFLayout &Layout,
- BinaryStreamRef MsfData) {
+ BinaryStreamRef MsfData,
+ BumpPtrAllocator &Allocator) {
MSFStreamLayout SL;
SL.Blocks = Layout.DirectoryBlocks;
SL.Length = Layout.SB->NumDirectoryBytes;
- return createStream(Layout.SB->BlockSize, SL, MsfData);
+ return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator);
}
std::unique_ptr<MappedBlockStream>
MappedBlockStream::createFpmStream(const MSFLayout &Layout,
- BinaryStreamRef MsfData) {
+ BinaryStreamRef MsfData,
+ BumpPtrAllocator &Allocator) {
MSFStreamLayout SL;
initializeFpmStreamLayout(Layout, SL);
- return createStream(Layout.SB->BlockSize, SL, MsfData);
+ return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator);
}
Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
@@ -148,7 +152,7 @@ Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
// into it, and return an ArrayRef to that. Do not touch existing pool
// allocations, as existing clients may be holding a pointer which must
// not be invalidated.
- uint8_t *WriteBuffer = static_cast<uint8_t *>(Pool.Allocate(Size, 8));
+ uint8_t *WriteBuffer = static_cast<uint8_t *>(Allocator.Allocate(Size, 8));
if (auto EC = readBytes(Offset, MutableArrayRef<uint8_t>(WriteBuffer, Size)))
return EC;
@@ -269,10 +273,6 @@ Error MappedBlockStream::readBytes(uint32_t Offset,
return Error::success();
}
-uint32_t MappedBlockStream::getNumBytesCopied() const {
- return static_cast<uint32_t>(Pool.getBytesAllocated());
-}
-
void MappedBlockStream::invalidateCache() { CacheMap.shrink_and_clear(); }
void MappedBlockStream::fixCacheAfterWrite(uint32_t Offset,
@@ -313,43 +313,48 @@ void MappedBlockStream::fixCacheAfterWrite(uint32_t Offset,
WritableMappedBlockStream::WritableMappedBlockStream(
uint32_t BlockSize, const MSFStreamLayout &Layout,
- WritableBinaryStreamRef MsfData)
- : ReadInterface(BlockSize, Layout, MsfData), WriteInterface(MsfData) {}
+ WritableBinaryStreamRef MsfData, BumpPtrAllocator &Allocator)
+ : ReadInterface(BlockSize, Layout, MsfData, Allocator),
+ WriteInterface(MsfData) {}
std::unique_ptr<WritableMappedBlockStream>
WritableMappedBlockStream::createStream(uint32_t BlockSize,
const MSFStreamLayout &Layout,
- WritableBinaryStreamRef MsfData) {
+ WritableBinaryStreamRef MsfData,
+ BumpPtrAllocator &Allocator) {
return llvm::make_unique<MappedBlockStreamImpl<WritableMappedBlockStream>>(
- BlockSize, Layout, MsfData);
+ BlockSize, Layout, MsfData, Allocator);
}
std::unique_ptr<WritableMappedBlockStream>
WritableMappedBlockStream::createIndexedStream(const MSFLayout &Layout,
WritableBinaryStreamRef MsfData,
- uint32_t StreamIndex) {
+ uint32_t StreamIndex,
+ BumpPtrAllocator &Allocator) {
assert(StreamIndex < Layout.StreamMap.size() && "Invalid stream index");
MSFStreamLayout SL;
SL.Blocks = Layout.StreamMap[StreamIndex];
SL.Length = Layout.StreamSizes[StreamIndex];
- return createStream(Layout.SB->BlockSize, SL, MsfData);
+ return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator);
}
std::unique_ptr<WritableMappedBlockStream>
WritableMappedBlockStream::createDirectoryStream(
- const MSFLayout &Layout, WritableBinaryStreamRef MsfData) {
+ const MSFLayout &Layout, WritableBinaryStreamRef MsfData,
+ BumpPtrAllocator &Allocator) {
MSFStreamLayout SL;
SL.Blocks = Layout.DirectoryBlocks;
SL.Length = Layout.SB->NumDirectoryBytes;
- return createStream(Layout.SB->BlockSize, SL, MsfData);
+ return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator);
}
std::unique_ptr<WritableMappedBlockStream>
WritableMappedBlockStream::createFpmStream(const MSFLayout &Layout,
- WritableBinaryStreamRef MsfData) {
+ WritableBinaryStreamRef MsfData,
+ BumpPtrAllocator &Allocator) {
MSFStreamLayout SL;
initializeFpmStreamLayout(Layout, SL);
- return createStream(Layout.SB->BlockSize, SL, MsfData);
+ return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator);
}
Error WritableMappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
index b28ec2ff33ac..22c2ef31bd71 100644
--- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
@@ -66,7 +66,11 @@ void DbiModuleDescriptorBuilder::setObjFileName(StringRef Name) {
void DbiModuleDescriptorBuilder::addSymbol(CVSymbol Symbol) {
Symbols.push_back(Symbol);
- SymbolByteSize += Symbol.data().size();
+ // Symbols written to a PDB file are required to be 4 byte aligned. The same
+ // is not true of object files.
+ assert(Symbol.length() % alignOf(CodeViewContainer::Pdb) == 0 &&
+ "Invalid Symbol alignment!");
+ SymbolByteSize += Symbol.length();
}
void DbiModuleDescriptorBuilder::addSourceFile(StringRef Path) {
@@ -140,7 +144,7 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter,
if (Layout.ModDiStream != kInvalidStreamIndex) {
auto NS = WritableMappedBlockStream::createIndexedStream(
- MsfLayout, MsfBuffer, Layout.ModDiStream);
+ MsfLayout, MsfBuffer, Layout.ModDiStream, MSF.getAllocator());
WritableBinaryStreamRef Ref(*NS);
BinaryStreamWriter SymbolWriter(Ref);
// Write the symbols.
@@ -153,7 +157,8 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter,
if (auto EC = SymbolWriter.writeStreamRef(RecordsRef))
return EC;
// TODO: Write C11 Line data
-
+ assert(SymbolWriter.getOffset() % alignOf(CodeViewContainer::Pdb) == 0 &&
+ "Invalid debug section alignment!");
for (const auto &Builder : C13Builders) {
assert(Builder && "Empty C13 Fragment Builder!");
if (auto EC = Builder->commit(SymbolWriter))
@@ -169,42 +174,9 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter,
return Error::success();
}
-void DbiModuleDescriptorBuilder::addC13Fragment(
- std::unique_ptr<DebugLinesSubsection> Lines) {
- DebugLinesSubsection &Frag = *Lines;
-
- // File Checksums have to come first, so push an empty entry on if this
- // is the first.
- if (C13Builders.empty())
- C13Builders.push_back(nullptr);
-
- this->LineInfo.push_back(std::move(Lines));
- C13Builders.push_back(
- llvm::make_unique<DebugSubsectionRecordBuilder>(Frag.kind(), Frag));
-}
-
-void DbiModuleDescriptorBuilder::addC13Fragment(
- std::unique_ptr<codeview::DebugInlineeLinesSubsection> Inlinees) {
- DebugInlineeLinesSubsection &Frag = *Inlinees;
-
- // File Checksums have to come first, so push an empty entry on if this
- // is the first.
- if (C13Builders.empty())
- C13Builders.push_back(nullptr);
-
- this->Inlinees.push_back(std::move(Inlinees));
- C13Builders.push_back(
- llvm::make_unique<DebugSubsectionRecordBuilder>(Frag.kind(), Frag));
-}
-
-void DbiModuleDescriptorBuilder::setC13FileChecksums(
- std::unique_ptr<DebugChecksumsSubsection> Checksums) {
- assert(!ChecksumInfo && "Can't have more than one checksum info!");
-
- if (C13Builders.empty())
- C13Builders.push_back(nullptr);
-
- ChecksumInfo = std::move(Checksums);
- C13Builders[0] = llvm::make_unique<DebugSubsectionRecordBuilder>(
- ChecksumInfo->kind(), *ChecksumInfo);
+void DbiModuleDescriptorBuilder::addDebugSubsection(
+ std::unique_ptr<DebugSubsection> Subsection) {
+ assert(Subsection);
+ C13Builders.push_back(llvm::make_unique<DebugSubsectionRecordBuilder>(
+ std::move(Subsection), CodeViewContainer::Pdb));
}
diff --git a/lib/DebugInfo/PDB/Native/DbiStream.cpp b/lib/DebugInfo/PDB/Native/DbiStream.cpp
index 2f4fb6cc295d..320b11dc5cab 100644
--- a/lib/DebugInfo/PDB/Native/DbiStream.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStream.cpp
@@ -252,7 +252,7 @@ Error DbiStream::initializeSectionHeadersData() {
return make_error<RawError>(raw_error_code::no_stream);
auto SHS = MappedBlockStream::createIndexedStream(
- Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum);
+ Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum, Pdb.getAllocator());
size_t StreamLen = SHS->getLength();
if (StreamLen % sizeof(object::coff_section))
@@ -284,7 +284,7 @@ Error DbiStream::initializeFpoRecords() {
return make_error<RawError>(raw_error_code::no_stream);
auto FS = MappedBlockStream::createIndexedStream(
- Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum);
+ Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum, Pdb.getAllocator());
size_t StreamLen = FS->getLength();
if (StreamLen % sizeof(object::FpoData))
diff --git a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
index 23c7456d7772..55c20fdb9af6 100644
--- a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
@@ -357,8 +357,8 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout,
if (auto EC = finalize())
return EC;
- auto DbiS = WritableMappedBlockStream::createIndexedStream(Layout, MsfBuffer,
- StreamDBI);
+ auto DbiS = WritableMappedBlockStream::createIndexedStream(
+ Layout, MsfBuffer, StreamDBI, Allocator);
BinaryStreamWriter Writer(*DbiS);
if (auto EC = Writer.writeObject(*Header))
@@ -396,7 +396,7 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout,
if (Stream.StreamNumber == kInvalidStreamIndex)
continue;
auto WritableStream = WritableMappedBlockStream::createIndexedStream(
- Layout, MsfBuffer, Stream.StreamNumber);
+ Layout, MsfBuffer, Stream.StreamNumber, Allocator);
BinaryStreamWriter DbgStreamWriter(*WritableStream);
if (auto EC = DbgStreamWriter.writeArray(Stream.Data))
return EC;
diff --git a/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
index f019d410328a..707128f7efd4 100644
--- a/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
@@ -50,8 +50,8 @@ Error InfoStreamBuilder::finalizeMsfLayout() {
Error InfoStreamBuilder::commit(const msf::MSFLayout &Layout,
WritableBinaryStreamRef Buffer) const {
- auto InfoS =
- WritableMappedBlockStream::createIndexedStream(Layout, Buffer, StreamPDB);
+ auto InfoS = WritableMappedBlockStream::createIndexedStream(
+ Layout, Buffer, StreamPDB, Msf.getAllocator());
BinaryStreamWriter Writer(*InfoS);
InfoStreamHeader H;
diff --git a/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
index d7a203746a0d..c4ff30011a17 100644
--- a/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
+++ b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
@@ -55,9 +55,9 @@ Error ModuleDebugStreamRef::reload() {
if (auto EC = Reader.readStreamRef(C13LinesSubstream, C13Size))
return EC;
- BinaryStreamReader LineReader(C13LinesSubstream);
- if (auto EC =
- LineReader.readArray(LinesAndChecksums, LineReader.bytesRemaining()))
+ BinaryStreamReader SubsectionsReader(C13LinesSubstream);
+ if (auto EC = SubsectionsReader.readArray(Subsections,
+ SubsectionsReader.bytesRemaining()))
return EC;
uint32_t GlobalRefsSize;
@@ -77,13 +77,27 @@ ModuleDebugStreamRef::symbols(bool *HadError) const {
return make_range(SymbolsSubstream.begin(HadError), SymbolsSubstream.end());
}
-llvm::iterator_range<ModuleDebugStreamRef::LinesAndChecksumsIterator>
-ModuleDebugStreamRef::linesAndChecksums() const {
- return make_range(LinesAndChecksums.begin(), LinesAndChecksums.end());
+llvm::iterator_range<ModuleDebugStreamRef::DebugSubsectionIterator>
+ModuleDebugStreamRef::subsections() const {
+ return make_range(Subsections.begin(), Subsections.end());
}
-bool ModuleDebugStreamRef::hasLineInfo() const {
+bool ModuleDebugStreamRef::hasDebugSubsections() const {
return C13LinesSubstream.getLength() > 0;
}
Error ModuleDebugStreamRef::commit() { return Error::success(); }
+
+Expected<codeview::DebugChecksumsSubsectionRef>
+ModuleDebugStreamRef::findChecksumsSubsection() const {
+ for (const auto &SS : subsections()) {
+ if (SS.kind() != DebugSubsectionKind::FileChecksums)
+ continue;
+
+ codeview::DebugChecksumsSubsectionRef Result;
+ if (auto EC = Result.initialize(SS.getRecordData()))
+ return std::move(EC);
+ return Result;
+ }
+ return make_error<RawError>(raw_error_code::no_entry);
+}
diff --git a/lib/DebugInfo/PDB/Native/PDBFile.cpp b/lib/DebugInfo/PDB/Native/PDBFile.cpp
index 859295d2c7d3..1254e23c73eb 100644
--- a/lib/DebugInfo/PDB/Native/PDBFile.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBFile.cpp
@@ -146,7 +146,8 @@ Error PDBFile::parseFileHeaders() {
// at getBlockSize() intervals, so we have to be compatible.
// See the function fpmPn() for more information:
// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
- auto FpmStream = MappedBlockStream::createFpmStream(ContainerLayout, *Buffer);
+ auto FpmStream =
+ MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator);
BinaryStreamReader FpmReader(*FpmStream);
ArrayRef<uint8_t> FpmBytes;
if (auto EC = FpmReader.readBytes(FpmBytes,
@@ -184,7 +185,8 @@ Error PDBFile::parseStreamData() {
// is exactly what we are attempting to parse. By specifying a custom
// subclass of IPDBStreamData which only accesses the fields that have already
// been parsed, we can avoid this and reuse MappedBlockStream.
- auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer);
+ auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer,
+ Allocator);
BinaryStreamReader Reader(*DS);
if (auto EC = Reader.readInteger(NumStreams))
return EC;
@@ -407,5 +409,6 @@ PDBFile::safelyCreateIndexedStream(const MSFLayout &Layout,
uint32_t StreamIndex) const {
if (StreamIndex >= getNumStreams())
return make_error<RawError>(raw_error_code::no_stream);
- return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex);
+ return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex,
+ Allocator);
}
diff --git a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
index c6568029ec55..2c6465e6fb2a 100644
--- a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
@@ -140,8 +140,8 @@ Error PDBFileBuilder::commit(StringRef Filename) {
if (auto EC = Writer.writeArray(Layout.DirectoryBlocks))
return EC;
- auto DirStream =
- WritableMappedBlockStream::createDirectoryStream(Layout, Buffer);
+ auto DirStream = WritableMappedBlockStream::createDirectoryStream(
+ Layout, Buffer, Allocator);
BinaryStreamWriter DW(*DirStream);
if (auto EC = DW.writeInteger<uint32_t>(Layout.StreamSizes.size()))
return EC;
@@ -158,8 +158,8 @@ Error PDBFileBuilder::commit(StringRef Filename) {
if (!ExpectedSN)
return ExpectedSN.takeError();
- auto NS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer,
- *ExpectedSN);
+ auto NS = WritableMappedBlockStream::createIndexedStream(
+ Layout, Buffer, *ExpectedSN, Allocator);
BinaryStreamWriter NSWriter(*NS);
if (auto EC = Strings.commit(NSWriter))
return EC;
diff --git a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
index e84573fe07b8..6013c342cf02 100644
--- a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
@@ -56,6 +56,10 @@ Error PDBStringTable::readStrings(BinaryStreamReader &Reader) {
return Error::success();
}
+codeview::DebugStringTableSubsectionRef PDBStringTable::getStringTable() const {
+ return Strings;
+}
+
Error PDBStringTable::readHashTable(BinaryStreamReader &Reader) {
const support::ulittle32_t *HashCount;
if (auto EC = Reader.readObject(HashCount))
diff --git a/lib/DebugInfo/PDB/Native/TpiStream.cpp b/lib/DebugInfo/PDB/Native/TpiStream.cpp
index 623afb371b50..67c803d3124e 100644
--- a/lib/DebugInfo/PDB/Native/TpiStream.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiStream.cpp
@@ -32,8 +32,7 @@ using namespace llvm::support;
using namespace llvm::msf;
using namespace llvm::pdb;
-TpiStream::TpiStream(const PDBFile &File,
- std::unique_ptr<MappedBlockStream> Stream)
+TpiStream::TpiStream(PDBFile &File, std::unique_ptr<MappedBlockStream> Stream)
: Pdb(File), Stream(std::move(Stream)) {}
TpiStream::~TpiStream() = default;
@@ -77,7 +76,8 @@ Error TpiStream::reload() {
"Invalid TPI hash stream index.");
auto HS = MappedBlockStream::createIndexedStream(
- Pdb.getMsfLayout(), Pdb.getMsfBuffer(), Header->HashStreamIndex);
+ Pdb.getMsfLayout(), Pdb.getMsfBuffer(), Header->HashStreamIndex,
+ Pdb.getAllocator());
BinaryStreamReader HSR(*HS);
// There should be a hash value for every type record, or no hashes at all.
diff --git a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
index 20456cc97823..9e943c7f114d 100644
--- a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
@@ -147,8 +147,8 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout,
if (auto EC = finalize())
return EC;
- auto InfoS =
- WritableMappedBlockStream::createIndexedStream(Layout, Buffer, Idx);
+ auto InfoS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer,
+ Idx, Allocator);
BinaryStreamWriter Writer(*InfoS);
if (auto EC = Writer.writeObject(*Header))
@@ -159,8 +159,8 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout,
return EC;
if (HashStreamIndex != kInvalidStreamIndex) {
- auto HVS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer,
- HashStreamIndex);
+ auto HVS = WritableMappedBlockStream::createIndexedStream(
+ Layout, Buffer, HashStreamIndex, Allocator);
BinaryStreamWriter HW(*HVS);
if (HashValueStream) {
if (auto EC = HW.writeStreamRef(*HashValueStream))
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index 7e6f9a7804b9..7754ac03b43d 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -39,6 +39,21 @@ void DIBuilder::trackIfUnresolved(MDNode *N) {
UnresolvedNodes.emplace_back(N);
}
+void DIBuilder::finalizeSubprogram(DISubprogram *SP) {
+ MDTuple *Temp = SP->getVariables().get();
+ if (!Temp || !Temp->isTemporary())
+ return;
+
+ SmallVector<Metadata *, 4> Variables;
+
+ auto PV = PreservedVariables.find(SP);
+ if (PV != PreservedVariables.end())
+ Variables.append(PV->second.begin(), PV->second.end());
+
+ DINodeArray AV = getOrCreateArray(Variables);
+ TempMDTuple(Temp)->replaceAllUsesWith(AV.get());
+}
+
void DIBuilder::finalize() {
if (!CUNode) {
assert(!AllowUnresolvedNodes &&
@@ -62,25 +77,11 @@ void DIBuilder::finalize() {
CUNode->replaceRetainedTypes(MDTuple::get(VMContext, RetainValues));
DISubprogramArray SPs = MDTuple::get(VMContext, AllSubprograms);
- auto resolveVariables = [&](DISubprogram *SP) {
- MDTuple *Temp = SP->getVariables().get();
- if (!Temp)
- return;
-
- SmallVector<Metadata *, 4> Variables;
-
- auto PV = PreservedVariables.find(SP);
- if (PV != PreservedVariables.end())
- Variables.append(PV->second.begin(), PV->second.end());
-
- DINodeArray AV = getOrCreateArray(Variables);
- TempMDTuple(Temp)->replaceAllUsesWith(AV.get());
- };
for (auto *SP : SPs)
- resolveVariables(SP);
+ finalizeSubprogram(SP);
for (auto *N : RetainValues)
if (auto *SP = dyn_cast<DISubprogram>(N))
- resolveVariables(SP);
+ finalizeSubprogram(SP);
if (!AllGVs.empty())
CUNode->replaceGlobalVariables(MDTuple::get(VMContext, AllGVs));
diff --git a/lib/IR/DebugLoc.cpp b/lib/IR/DebugLoc.cpp
index b7e3f0c6779e..0485fece7c42 100644
--- a/lib/IR/DebugLoc.cpp
+++ b/lib/IR/DebugLoc.cpp
@@ -99,87 +99,6 @@ DebugLoc DebugLoc::appendInlinedAt(DebugLoc DL, DILocation *InlinedAt,
return Last;
}
-/// Reparent \c Scope from \c OrigSP to \c NewSP.
-static DIScope *reparentScope(LLVMContext &Ctx, DIScope *Scope,
- DISubprogram *OrigSP, DISubprogram *NewSP,
- DenseMap<const MDNode *, MDNode *> &Cache) {
- SmallVector<DIScope *, 3> ScopeChain;
- DIScope *Last = NewSP;
- DIScope *CurScope = Scope;
- do {
- if (auto *SP = dyn_cast<DISubprogram>(CurScope)) {
- // Don't rewrite this scope chain if it doesn't lead to the replaced SP.
- if (SP != OrigSP)
- return Scope;
- Cache.insert({OrigSP, NewSP});
- break;
- }
- if (auto *Found = Cache[CurScope]) {
- Last = cast<DIScope>(Found);
- break;
- }
- ScopeChain.push_back(CurScope);
- } while ((CurScope = CurScope->getScope().resolve()));
-
- // Starting from the top, rebuild the nodes to point to the new inlined-at
- // location (then rebuilding the rest of the chain behind it) and update the
- // map of already-constructed inlined-at nodes.
- for (const DIScope *MD : reverse(ScopeChain)) {
- if (auto *LB = dyn_cast<DILexicalBlock>(MD))
- Cache[MD] = Last = DILexicalBlock::getDistinct(
- Ctx, Last, LB->getFile(), LB->getLine(), LB->getColumn());
- else if (auto *LB = dyn_cast<DILexicalBlockFile>(MD))
- Cache[MD] = Last = DILexicalBlockFile::getDistinct(
- Ctx, Last, LB->getFile(), LB->getDiscriminator());
- else
- llvm_unreachable("illegal parent scope");
- }
- return Last;
-}
-
-void DebugLoc::reparentDebugInfo(Instruction &I, DISubprogram *OrigSP,
- DISubprogram *NewSP,
- DenseMap<const MDNode *, MDNode *> &Cache) {
- auto DL = I.getDebugLoc();
- if (!OrigSP || !NewSP || OrigSP == NewSP || !DL)
- return;
-
- // Reparent the debug location.
- auto &Ctx = I.getContext();
- DILocation *InlinedAt = DL->getInlinedAt();
- if (InlinedAt) {
- while (auto *IA = InlinedAt->getInlinedAt())
- InlinedAt = IA;
- auto NewScope =
- reparentScope(Ctx, InlinedAt->getScope(), OrigSP, NewSP, Cache);
- InlinedAt =
- DebugLoc::get(InlinedAt->getLine(), InlinedAt->getColumn(), NewScope);
- }
- I.setDebugLoc(
- DebugLoc::get(DL.getLine(), DL.getCol(),
- reparentScope(Ctx, DL->getScope(), OrigSP, NewSP, Cache),
- DebugLoc::appendInlinedAt(DL, InlinedAt, Ctx, Cache,
- ReplaceLastInlinedAt)));
-
- // Fix up debug variables to point to NewSP.
- auto reparentVar = [&](DILocalVariable *Var) {
- return DILocalVariable::get(
- Ctx,
- cast<DILocalScope>(
- reparentScope(Ctx, Var->getScope(), OrigSP, NewSP, Cache)),
- Var->getName(), Var->getFile(), Var->getLine(), Var->getType(),
- Var->getArg(), Var->getFlags(), Var->getAlignInBits());
- };
- if (auto *DbgValue = dyn_cast<DbgValueInst>(&I)) {
- auto *Var = DbgValue->getVariable();
- I.setOperand(2, MetadataAsValue::get(Ctx, reparentVar(Var)));
- } else if (auto *DbgDeclare = dyn_cast<DbgDeclareInst>(&I)) {
- auto *Var = DbgDeclare->getVariable();
- I.setOperand(1, MetadataAsValue::get(Ctx, reparentVar(Var)));
- }
-}
-
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void DebugLoc::dump() const {
if (!Loc)
diff --git a/lib/IR/OptBisect.cpp b/lib/IR/OptBisect.cpp
index b670c817569a..a03a6fb62237 100644
--- a/lib/IR/OptBisect.cpp
+++ b/lib/IR/OptBisect.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/RegionInfo.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/OptBisect.h"
#include "llvm/Pass.h"
@@ -53,13 +54,20 @@ static std::string getDescription(const BasicBlock &BB) {
}
static std::string getDescription(const Loop &L) {
- // FIXME: I'd like to be able to provide a better description here, but
- // calling L->getHeader() would introduce a new dependency on the
- // LLVMCore library.
+ // FIXME: Move into LoopInfo so we can get a better description
+ // (and avoid a circular dependency between IR and Analysis).
return "loop";
}
+static std::string getDescription(const Region &R) {
+ // FIXME: Move into RegionInfo so we can get a better description
+ // (and avoid a circular dependency between IR and Analysis).
+ return "region";
+}
+
static std::string getDescription(const CallGraphSCC &SCC) {
+ // FIXME: Move into CallGraphSCCPass to avoid circular dependency between
+ // IR and Analysis.
std::string Desc = "SCC (";
bool First = true;
for (CallGraphNode *CGN : SCC) {
@@ -83,6 +91,7 @@ template bool OptBisect::shouldRunPass(const Pass *, const Function &);
template bool OptBisect::shouldRunPass(const Pass *, const BasicBlock &);
template bool OptBisect::shouldRunPass(const Pass *, const Loop &);
template bool OptBisect::shouldRunPass(const Pass *, const CallGraphSCC &);
+template bool OptBisect::shouldRunPass(const Pass *, const Region &);
template <class UnitT>
bool OptBisect::shouldRunPass(const Pass *P, const UnitT &U) {
diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp
index 9efc095f9fcf..92145aaf667a 100644
--- a/lib/LTO/LTO.cpp
+++ b/lib/LTO/LTO.cpp
@@ -122,6 +122,7 @@ static void computeCacheKey(
AddUnsigned(Conf.CGOptLevel);
AddUnsigned(Conf.CGFileType);
AddUnsigned(Conf.OptLevel);
+ AddUnsigned(Conf.UseNewPM);
AddString(Conf.OptPipeline);
AddString(Conf.AAPipeline);
AddString(Conf.OverrideTriple);
@@ -621,6 +622,19 @@ unsigned LTO::getMaxTasks() const {
}
Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) {
+ // Compute "dead" symbols, we don't want to import/export these!
+ DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
+ for (auto &Res : GlobalResolutions) {
+ if (Res.second.VisibleOutsideThinLTO &&
+ // IRName will be defined if we have seen the prevailing copy of
+ // this value. If not, no need to preserve any ThinLTO copies.
+ !Res.second.IRName.empty())
+ GUIDPreservedSymbols.insert(GlobalValue::getGUID(
+ GlobalValue::dropLLVMManglingEscape(Res.second.IRName)));
+ }
+
+ computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols);
+
// Save the status of having a regularLTO combined module, as
// this is needed for generating the ThinLTO Task ID, and
// the CombinedModule will be moved at the end of runRegularLTO.
@@ -930,6 +944,17 @@ ThinBackend lto::createWriteIndexesThinBackend(std::string OldPrefix,
};
}
+static bool IsLiveByGUID(const ModuleSummaryIndex &Index,
+ GlobalValue::GUID GUID) {
+ auto VI = Index.getValueInfo(GUID);
+ if (!VI)
+ return false;
+ for (auto &I : VI.getSummaryList())
+ if (Index.isGlobalValueLive(I.get()))
+ return true;
+ return false;
+}
+
Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
bool HasRegularLTO) {
if (ThinLTO.ModuleMap.empty())
@@ -962,22 +987,8 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
if (Conf.OptLevel > 0) {
- // Compute "dead" symbols, we don't want to import/export these!
- DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
- for (auto &Res : GlobalResolutions) {
- if (Res.second.VisibleOutsideThinLTO &&
- // IRName will be defined if we have seen the prevailing copy of
- // this value. If not, no need to preserve any ThinLTO copies.
- !Res.second.IRName.empty())
- GUIDPreservedSymbols.insert(GlobalValue::getGUID(
- GlobalValue::dropLLVMManglingEscape(Res.second.IRName)));
- }
-
- auto DeadSymbols =
- computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols);
-
ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
- ImportLists, ExportLists, &DeadSymbols);
+ ImportLists, ExportLists);
std::set<GlobalValue::GUID> ExportedGUIDs;
for (auto &Res : GlobalResolutions) {
@@ -992,7 +1003,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
auto GUID = GlobalValue::getGUID(
GlobalValue::dropLLVMManglingEscape(Res.second.IRName));
// Mark exported unless index-based analysis determined it to be dead.
- if (!DeadSymbols.count(GUID))
+ if (IsLiveByGUID(ThinLTO.CombinedIndex, GUID))
ExportedGUIDs.insert(GUID);
}
diff --git a/lib/LTO/LTOBackend.cpp b/lib/LTO/LTOBackend.cpp
index f9c41f5c9744..3f72e446cdf2 100644
--- a/lib/LTO/LTOBackend.cpp
+++ b/lib/LTO/LTOBackend.cpp
@@ -42,11 +42,6 @@
using namespace llvm;
using namespace lto;
-static cl::opt<bool>
- LTOUseNewPM("lto-use-new-pm",
- cl::desc("Run LTO passes using the new pass manager"),
- cl::init(false), cl::Hidden);
-
LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) {
errs() << "failed to open " << Path << ": " << Msg << '\n';
errs().flush();
@@ -266,7 +261,7 @@ bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
if (!Conf.OptPipeline.empty())
runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline,
Conf.DisableVerify);
- else if (LTOUseNewPM)
+ else if (Conf.UseNewPM)
runNewPMPasses(Mod, TM, Conf.OptLevel, IsThinLTO);
else
runOldPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary);
diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp
index ca3fc60f9501..6b221a347c17 100644
--- a/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -628,13 +628,13 @@ void ThinLTOCodeGenerator::promote(Module &TheModule,
PreservedSymbols, Triple(TheModule.getTargetTriple()));
// Compute "dead" symbols, we don't want to import/export these!
- auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols);
+ computeDeadSymbols(Index, GUIDPreservedSymbols);
// Generate import/export list
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
- ExportLists, &DeadSymbols);
+ ExportLists);
// Resolve LinkOnce/Weak symbols.
StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
@@ -673,13 +673,13 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
PreservedSymbols, Triple(TheModule.getTargetTriple()));
// Compute "dead" symbols, we don't want to import/export these!
- auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols);
+ computeDeadSymbols(Index, GUIDPreservedSymbols);
// Generate import/export list
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
- ExportLists, &DeadSymbols);
+ ExportLists);
auto &ImportList = ImportLists[TheModule.getModuleIdentifier()];
crossImportIntoModule(TheModule, Index, ModuleMap, ImportList);
@@ -750,13 +750,13 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule,
Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
// Compute "dead" symbols, we don't want to import/export these!
- auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols);
+ computeDeadSymbols(Index, GUIDPreservedSymbols);
// Generate import/export list
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
- ExportLists, &DeadSymbols);
+ ExportLists);
auto &ExportList = ExportLists[ModuleIdentifier];
// Be friendly and don't nuke totally the module when the client didn't
@@ -902,14 +902,14 @@ void ThinLTOCodeGenerator::run() {
computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
// Compute "dead" symbols, we don't want to import/export these!
- auto DeadSymbols = computeDeadSymbols(*Index, GUIDPreservedSymbols);
+ computeDeadSymbols(*Index, GUIDPreservedSymbols);
// Collect the import/export lists for all modules from the call-graph in the
// combined index.
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries, ImportLists,
- ExportLists, &DeadSymbols);
+ ExportLists);
// We use a std::map here to be able to have a defined ordering when
// producing a hash for the cache entry.
diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp
index 8c3df36cfb48..9b2031f05043 100644
--- a/lib/MC/WasmObjectWriter.cpp
+++ b/lib/MC/WasmObjectWriter.cpp
@@ -40,6 +40,7 @@ using namespace llvm;
#define DEBUG_TYPE "reloc-info"
namespace {
+
// For patching purposes, we need to remember where each section starts, both
// for patching up the section size field, and for patching up references to
// locations within the section.
@@ -50,6 +51,82 @@ struct SectionBookkeeping {
uint64_t ContentsOffset;
};
+// The signature of a wasm function, in a struct capable of being used as a
+// DenseMap key.
+struct WasmFunctionType {
+ // Support empty and tombstone instances, needed by DenseMap.
+ enum { Plain, Empty, Tombstone } State;
+
+ // The return types of the function.
+ SmallVector<wasm::ValType, 1> Returns;
+
+ // The parameter types of the function.
+ SmallVector<wasm::ValType, 4> Params;
+
+ WasmFunctionType() : State(Plain) {}
+
+ bool operator==(const WasmFunctionType &Other) const {
+ return State == Other.State && Returns == Other.Returns &&
+ Params == Other.Params;
+ }
+};
+
+// Traits for using WasmFunctionType in a DenseMap.
+struct WasmFunctionTypeDenseMapInfo {
+ static WasmFunctionType getEmptyKey() {
+ WasmFunctionType FuncTy;
+ FuncTy.State = WasmFunctionType::Empty;
+ return FuncTy;
+ }
+ static WasmFunctionType getTombstoneKey() {
+ WasmFunctionType FuncTy;
+ FuncTy.State = WasmFunctionType::Tombstone;
+ return FuncTy;
+ }
+ static unsigned getHashValue(const WasmFunctionType &FuncTy) {
+ uintptr_t Value = FuncTy.State;
+ for (wasm::ValType Ret : FuncTy.Returns)
+ Value += DenseMapInfo<int32_t>::getHashValue(int32_t(Ret));
+ for (wasm::ValType Param : FuncTy.Params)
+ Value += DenseMapInfo<int32_t>::getHashValue(int32_t(Param));
+ return Value;
+ }
+ static bool isEqual(const WasmFunctionType &LHS,
+ const WasmFunctionType &RHS) {
+ return LHS == RHS;
+ }
+};
+
+// A wasm import to be written into the import section.
+struct WasmImport {
+ StringRef ModuleName;
+ StringRef FieldName;
+ unsigned Kind;
+ int32_t Type;
+};
+
+// A wasm function to be written into the function section.
+struct WasmFunction {
+ int32_t Type;
+ const MCSymbolWasm *Sym;
+};
+
+// A wasm export to be written into the export section.
+struct WasmExport {
+ StringRef FieldName;
+ unsigned Kind;
+ uint32_t Index;
+};
+
+// A wasm global to be written into the global section.
+struct WasmGlobal {
+ wasm::ValType Type;
+ bool IsMutable;
+ bool HasImport;
+ uint64_t InitialValue;
+ uint32_t ImportIndex;
+};
+
class WasmObjectWriter : public MCObjectWriter {
/// Helper struct for containing some precomputed information on symbols.
struct WasmSymbolData {
@@ -91,18 +168,10 @@ public:
: MCObjectWriter(OS, /*IsLittleEndian=*/true), TargetObjectWriter(MOTW) {}
private:
- void reset() override {
- MCObjectWriter::reset();
- }
-
~WasmObjectWriter() override;
void writeHeader(const MCAssembler &Asm);
- void writeValueType(wasm::ValType Ty) {
- encodeSLEB128(int32_t(Ty), getStream());
- }
-
void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
MCValue Target, bool &IsPCRel,
@@ -112,7 +181,37 @@ private:
const MCAsmLayout &Layout) override;
void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
+
+ void writeValueType(wasm::ValType Ty) {
+ encodeSLEB128(int32_t(Ty), getStream());
+ }
+
+ void writeTypeSection(const SmallVector<WasmFunctionType, 4> &FunctionTypes);
+ void writeImportSection(const SmallVector<WasmImport, 4> &Imports);
+ void writeFunctionSection(const SmallVector<WasmFunction, 4> &Functions);
+ void writeTableSection(const SmallVector<uint32_t, 4> &TableElems);
+ void writeMemorySection(const SmallVector<char, 0> &DataBytes);
+ void writeGlobalSection(const SmallVector<WasmGlobal, 4> &Globals);
+ void writeExportSection(const SmallVector<WasmExport, 4> &Exports);
+ void writeElemSection(const SmallVector<uint32_t, 4> &TableElems);
+ void writeCodeSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices,
+ const SmallVector<WasmFunction, 4> &Functions);
+ uint64_t
+ writeDataSection(const SmallVector<char, 0> &DataBytes,
+ DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices);
+ void writeNameSection(const SmallVector<WasmFunction, 4> &Functions,
+ const SmallVector<WasmImport, 4> &Imports,
+ uint32_t NumFuncImports);
+ void writeCodeRelocSection(
+ DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices);
+ void writeDataRelocSection(
+ DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices,
+ uint64_t DataSectionHeaderSize);
+ void writeLinkingMetaDataSection(bool HasStackPointer,
+ uint32_t StackPointerGlobal);
};
+
} // end anonymous namespace
WasmObjectWriter::~WasmObjectWriter() {}
@@ -278,86 +377,6 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
DataRelocations.push_back(Rec);
}
-namespace {
-
-// The signature of a wasm function, in a struct capable of being used as a
-// DenseMap key.
-struct WasmFunctionType {
- // Support empty and tombstone instances, needed by DenseMap.
- enum { Plain, Empty, Tombstone } State;
-
- // The return types of the function.
- SmallVector<wasm::ValType, 1> Returns;
-
- // The parameter types of the function.
- SmallVector<wasm::ValType, 4> Params;
-
- WasmFunctionType() : State(Plain) {}
-
- bool operator==(const WasmFunctionType &Other) const {
- return State == Other.State && Returns == Other.Returns &&
- Params == Other.Params;
- }
-};
-
-// Traits for using WasmFunctionType in a DenseMap.
-struct WasmFunctionTypeDenseMapInfo {
- static WasmFunctionType getEmptyKey() {
- WasmFunctionType FuncTy;
- FuncTy.State = WasmFunctionType::Empty;
- return FuncTy;
- }
- static WasmFunctionType getTombstoneKey() {
- WasmFunctionType FuncTy;
- FuncTy.State = WasmFunctionType::Tombstone;
- return FuncTy;
- }
- static unsigned getHashValue(const WasmFunctionType &FuncTy) {
- uintptr_t Value = FuncTy.State;
- for (wasm::ValType Ret : FuncTy.Returns)
- Value += DenseMapInfo<int32_t>::getHashValue(int32_t(Ret));
- for (wasm::ValType Param : FuncTy.Params)
- Value += DenseMapInfo<int32_t>::getHashValue(int32_t(Param));
- return Value;
- }
- static bool isEqual(const WasmFunctionType &LHS,
- const WasmFunctionType &RHS) {
- return LHS == RHS;
- }
-};
-
-// A wasm import to be written into the import section.
-struct WasmImport {
- StringRef ModuleName;
- StringRef FieldName;
- unsigned Kind;
- int32_t Type;
-};
-
-// A wasm function to be written into the function section.
-struct WasmFunction {
- int32_t Type;
- const MCSymbolWasm *Sym;
-};
-
-// A wasm export to be written into the export section.
-struct WasmExport {
- StringRef FieldName;
- unsigned Kind;
- uint32_t Index;
-};
-
-// A wasm global to be written into the global section.
-struct WasmGlobal {
- wasm::ValType Type;
- bool IsMutable;
- bool HasImport;
- uint64_t InitialValue;
- uint32_t ImportIndex;
-};
-
-} // end anonymous namespace
-
// Write X as an (unsigned) LEB value at offset Offset in Stream, padded
// to allow patching.
static void
@@ -529,6 +548,367 @@ static void WriteTypeRelocations(
}
}
+void WasmObjectWriter::writeTypeSection(
+ const SmallVector<WasmFunctionType, 4> &FunctionTypes) {
+ if (FunctionTypes.empty())
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_TYPE);
+
+ encodeULEB128(FunctionTypes.size(), getStream());
+
+ for (const WasmFunctionType &FuncTy : FunctionTypes) {
+ encodeSLEB128(wasm::WASM_TYPE_FUNC, getStream());
+ encodeULEB128(FuncTy.Params.size(), getStream());
+ for (wasm::ValType Ty : FuncTy.Params)
+ writeValueType(Ty);
+ encodeULEB128(FuncTy.Returns.size(), getStream());
+ for (wasm::ValType Ty : FuncTy.Returns)
+ writeValueType(Ty);
+ }
+
+ endSection(Section);
+}
+
+void WasmObjectWriter::writeImportSection(
+ const SmallVector<WasmImport, 4> &Imports) {
+ if (Imports.empty())
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_IMPORT);
+
+ encodeULEB128(Imports.size(), getStream());
+ for (const WasmImport &Import : Imports) {
+ StringRef ModuleName = Import.ModuleName;
+ encodeULEB128(ModuleName.size(), getStream());
+ writeBytes(ModuleName);
+
+ StringRef FieldName = Import.FieldName;
+ encodeULEB128(FieldName.size(), getStream());
+ writeBytes(FieldName);
+
+ encodeULEB128(Import.Kind, getStream());
+
+ switch (Import.Kind) {
+ case wasm::WASM_EXTERNAL_FUNCTION:
+ encodeULEB128(Import.Type, getStream());
+ break;
+ case wasm::WASM_EXTERNAL_GLOBAL:
+ encodeSLEB128(int32_t(Import.Type), getStream());
+ encodeULEB128(0, getStream()); // mutability
+ break;
+ default:
+ llvm_unreachable("unsupported import kind");
+ }
+ }
+
+ endSection(Section);
+}
+
+void WasmObjectWriter::writeFunctionSection(
+ const SmallVector<WasmFunction, 4> &Functions) {
+ if (Functions.empty())
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_FUNCTION);
+
+ encodeULEB128(Functions.size(), getStream());
+ for (const WasmFunction &Func : Functions)
+ encodeULEB128(Func.Type, getStream());
+
+ endSection(Section);
+}
+
+void WasmObjectWriter::writeTableSection(
+ const SmallVector<uint32_t, 4> &TableElems) {
+ // For now, always emit the table section, since indirect calls are not
+ // valid without it. In the future, we could perhaps be more clever and omit
+ // it if there are no indirect calls.
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_TABLE);
+
+ // The number of tables, fixed to 1 for now.
+ encodeULEB128(1, getStream());
+
+ encodeSLEB128(wasm::WASM_TYPE_ANYFUNC, getStream());
+
+ encodeULEB128(0, getStream()); // flags
+ encodeULEB128(TableElems.size(), getStream()); // initial
+
+ endSection(Section);
+}
+
+void WasmObjectWriter::writeMemorySection(
+ const SmallVector<char, 0> &DataBytes) {
+ // For now, always emit the memory section, since loads and stores are not
+ // valid without it. In the future, we could perhaps be more clever and omit
+ // it if there are no loads or stores.
+ SectionBookkeeping Section;
+ uint32_t NumPages =
+ (DataBytes.size() + wasm::WasmPageSize - 1) / wasm::WasmPageSize;
+
+ startSection(Section, wasm::WASM_SEC_MEMORY);
+ encodeULEB128(1, getStream()); // number of memory spaces
+
+ encodeULEB128(0, getStream()); // flags
+ encodeULEB128(NumPages, getStream()); // initial
+
+ endSection(Section);
+}
+
+void WasmObjectWriter::writeGlobalSection(
+ const SmallVector<WasmGlobal, 4> &Globals) {
+ if (Globals.empty())
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_GLOBAL);
+
+ encodeULEB128(Globals.size(), getStream());
+ for (const WasmGlobal &Global : Globals) {
+ writeValueType(Global.Type);
+ write8(Global.IsMutable);
+
+ if (Global.HasImport) {
+ assert(Global.InitialValue == 0);
+ write8(wasm::WASM_OPCODE_GET_GLOBAL);
+ encodeULEB128(Global.ImportIndex, getStream());
+ } else {
+ assert(Global.ImportIndex == 0);
+ write8(wasm::WASM_OPCODE_I32_CONST);
+ encodeSLEB128(Global.InitialValue, getStream()); // offset
+ }
+ write8(wasm::WASM_OPCODE_END);
+ }
+
+ endSection(Section);
+}
+
+void WasmObjectWriter::writeExportSection(
+ const SmallVector<WasmExport, 4> &Exports) {
+ if (Exports.empty())
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_EXPORT);
+
+ encodeULEB128(Exports.size(), getStream());
+ for (const WasmExport &Export : Exports) {
+ encodeULEB128(Export.FieldName.size(), getStream());
+ writeBytes(Export.FieldName);
+
+ encodeSLEB128(Export.Kind, getStream());
+
+ encodeULEB128(Export.Index, getStream());
+ }
+
+ endSection(Section);
+}
+
+void WasmObjectWriter::writeElemSection(
+ const SmallVector<uint32_t, 4> &TableElems) {
+ if (TableElems.empty())
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_ELEM);
+
+ encodeULEB128(1, getStream()); // number of "segments"
+ encodeULEB128(0, getStream()); // the table index
+
+ // init expr for starting offset
+ write8(wasm::WASM_OPCODE_I32_CONST);
+ encodeSLEB128(0, getStream());
+ write8(wasm::WASM_OPCODE_END);
+
+ encodeULEB128(TableElems.size(), getStream());
+ for (uint32_t Elem : TableElems)
+ encodeULEB128(Elem, getStream());
+
+ endSection(Section);
+}
+
+void WasmObjectWriter::writeCodeSection(
+ const MCAssembler &Asm, const MCAsmLayout &Layout,
+ DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices,
+ const SmallVector<WasmFunction, 4> &Functions) {
+ if (Functions.empty())
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_CODE);
+
+ encodeULEB128(Functions.size(), getStream());
+
+ for (const WasmFunction &Func : Functions) {
+ MCSectionWasm &FuncSection =
+ static_cast<MCSectionWasm &>(Func.Sym->getSection());
+
+ if (Func.Sym->isVariable())
+ report_fatal_error("weak symbols not supported yet");
+
+ if (Func.Sym->getOffset() != 0)
+ report_fatal_error("function sections must contain one function each");
+
+ if (!Func.Sym->getSize())
+ report_fatal_error("function symbols must have a size set with .size");
+
+ int64_t Size = 0;
+ if (!Func.Sym->getSize()->evaluateAsAbsolute(Size, Layout))
+ report_fatal_error(".size expression must be evaluatable");
+
+ encodeULEB128(Size, getStream());
+
+ FuncSection.setSectionOffset(getStream().tell() -
+ Section.ContentsOffset);
+
+ Asm.writeSectionData(&FuncSection, Layout);
+ }
+
+ // Apply the type index fixups for call_indirect etc. instructions.
+ for (size_t i = 0, e = TypeIndexFixups.size(); i < e; ++i) {
+ uint32_t Type = TypeIndexFixupTypes[i];
+ unsigned Padding = PaddingFor5ByteULEB128(Type);
+
+ const WasmRelocationEntry &Fixup = TypeIndexFixups[i];
+ assert(Fixup.Addend == 0);
+ assert(Fixup.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB);
+ uint64_t Offset = Fixup.Offset +
+ Fixup.FixupSection->getSectionOffset();
+
+ uint8_t Buffer[16];
+ unsigned SizeLen = encodeULEB128(Type, Buffer, Padding);
+ assert(SizeLen == 5);
+ getStream().pwrite((char *)Buffer, SizeLen,
+ Section.ContentsOffset + Offset);
+ }
+
+ // Apply fixups.
+ ApplyRelocations(CodeRelocations, getStream(), SymbolIndices,
+ Section.ContentsOffset);
+
+ endSection(Section);
+}
+
+uint64_t WasmObjectWriter::writeDataSection(
+ const SmallVector<char, 0> &DataBytes,
+ DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices) {
+ if (DataBytes.empty())
+ return 0;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_DATA);
+
+ encodeULEB128(1, getStream()); // count
+ encodeULEB128(0, getStream()); // memory index
+ write8(wasm::WASM_OPCODE_I32_CONST);
+ encodeSLEB128(0, getStream()); // offset
+ write8(wasm::WASM_OPCODE_END);
+ encodeULEB128(DataBytes.size(), getStream()); // size
+ uint32_t HeaderSize = getStream().tell() - Section.ContentsOffset;
+ writeBytes(DataBytes); // data
+
+ // Apply fixups.
+ ApplyRelocations(DataRelocations, getStream(), SymbolIndices,
+ Section.ContentsOffset + HeaderSize);
+
+ endSection(Section);
+ return HeaderSize;
+}
+
+void WasmObjectWriter::writeNameSection(
+ const SmallVector<WasmFunction, 4> &Functions,
+ const SmallVector<WasmImport, 4> &Imports,
+ unsigned NumFuncImports) {
+ uint32_t TotalFunctions = NumFuncImports + Functions.size();
+ if (TotalFunctions == 0)
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_CUSTOM, "name");
+ SectionBookkeeping SubSection;
+ startSection(SubSection, wasm::WASM_NAMES_FUNCTION);
+
+ encodeULEB128(TotalFunctions, getStream());
+ uint32_t Index = 0;
+ for (const WasmImport &Import : Imports) {
+ if (Import.Kind == wasm::WASM_EXTERNAL_FUNCTION) {
+ encodeULEB128(Index, getStream());
+ encodeULEB128(Import.FieldName.size(), getStream());
+ writeBytes(Import.FieldName);
+ ++Index;
+ }
+ }
+ for (const WasmFunction &Func : Functions) {
+ encodeULEB128(Index, getStream());
+ encodeULEB128(Func.Sym->getName().size(), getStream());
+ writeBytes(Func.Sym->getName());
+ ++Index;
+ }
+
+ endSection(SubSection);
+ endSection(Section);
+}
+
+void WasmObjectWriter::writeCodeRelocSection(
+ DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices) {
+ // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md
+ // for descriptions of the reloc sections.
+
+ if (CodeRelocations.empty())
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.CODE");
+
+ encodeULEB128(wasm::WASM_SEC_CODE, getStream());
+ encodeULEB128(CodeRelocations.size() + TypeIndexFixups.size(), getStream());
+
+ WriteRelocations(CodeRelocations, getStream(), SymbolIndices, 0);
+ WriteTypeRelocations(TypeIndexFixups, TypeIndexFixupTypes, getStream());
+
+ endSection(Section);
+}
+
+void WasmObjectWriter::writeDataRelocSection(
+ DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices,
+ uint64_t DataSectionHeaderSize) {
+ // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md
+ // for descriptions of the reloc sections.
+
+ if (DataRelocations.empty())
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.DATA");
+
+ encodeULEB128(wasm::WASM_SEC_DATA, getStream());
+ encodeULEB128(DataRelocations.size(), getStream());
+
+ WriteRelocations(DataRelocations, getStream(), SymbolIndices,
+ DataSectionHeaderSize);
+
+ endSection(Section);
+}
+
+void WasmObjectWriter::writeLinkingMetaDataSection(
+ bool HasStackPointer, uint32_t StackPointerGlobal) {
+ if (!HasStackPointer)
+ return;
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_CUSTOM, "linking");
+
+ encodeULEB128(1, getStream()); // count
+
+ encodeULEB128(wasm::WASM_STACK_POINTER, getStream()); // type
+ encodeULEB128(StackPointerGlobal, getStream()); // id
+
+ endSection(Section);
+}
+
void WasmObjectWriter::writeObject(MCAssembler &Asm,
const MCAsmLayout &Layout) {
MCContext &Ctx = Asm.getContext();
@@ -730,16 +1110,21 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
if (IsAddressTaken.count(&WS))
TableElems.push_back(Index);
} else {
- if (WS.getOffset() != 0)
- report_fatal_error("data sections must contain one variable each");
- if (!WS.getSize())
- report_fatal_error("data symbols must have a size set with .size");
-
- int64_t Size = 0;
- if (!WS.getSize()->evaluateAsAbsolute(Size, Layout))
- report_fatal_error(".size expression must be evaluatable");
+ if (WS.isTemporary() && !WS.getSize())
+ continue;
if (WS.isDefined(false)) {
+ if (WS.getOffset() != 0)
+ report_fatal_error("data sections must contain one variable each: " +
+ WS.getName());
+ if (!WS.getSize())
+ report_fatal_error("data symbols must have a size set with .size: " +
+ WS.getName());
+
+ int64_t Size = 0;
+ if (!WS.getSize()->evaluateAsAbsolute(Size, Layout))
+ report_fatal_error(".size expression must be evaluatable");
+
MCSectionWasm &DataSection =
static_cast<MCSectionWasm &>(WS.getSection());
@@ -827,322 +1212,23 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
// Write out the Wasm header.
writeHeader(Asm);
- SectionBookkeeping Section;
-
- // === Type Section =========================================================
- if (!FunctionTypes.empty()) {
- startSection(Section, wasm::WASM_SEC_TYPE);
-
- encodeULEB128(FunctionTypes.size(), getStream());
-
- for (WasmFunctionType &FuncTy : FunctionTypes) {
- encodeSLEB128(wasm::WASM_TYPE_FUNC, getStream());
- encodeULEB128(FuncTy.Params.size(), getStream());
- for (wasm::ValType Ty : FuncTy.Params)
- writeValueType(Ty);
- encodeULEB128(FuncTy.Returns.size(), getStream());
- for (wasm::ValType Ty : FuncTy.Returns)
- writeValueType(Ty);
- }
-
- endSection(Section);
- }
-
- // === Import Section ========================================================
- if (!Imports.empty()) {
- startSection(Section, wasm::WASM_SEC_IMPORT);
-
- encodeULEB128(Imports.size(), getStream());
- for (const WasmImport &Import : Imports) {
- StringRef ModuleName = Import.ModuleName;
- encodeULEB128(ModuleName.size(), getStream());
- writeBytes(ModuleName);
-
- StringRef FieldName = Import.FieldName;
- encodeULEB128(FieldName.size(), getStream());
- writeBytes(FieldName);
-
- encodeULEB128(Import.Kind, getStream());
-
- switch (Import.Kind) {
- case wasm::WASM_EXTERNAL_FUNCTION:
- encodeULEB128(Import.Type, getStream());
- break;
- case wasm::WASM_EXTERNAL_GLOBAL:
- encodeSLEB128(int32_t(Import.Type), getStream());
- encodeULEB128(0, getStream()); // mutability
- break;
- default:
- llvm_unreachable("unsupported import kind");
- }
- }
-
- endSection(Section);
- }
-
- // === Function Section ======================================================
- if (!Functions.empty()) {
- startSection(Section, wasm::WASM_SEC_FUNCTION);
-
- encodeULEB128(Functions.size(), getStream());
- for (const WasmFunction &Func : Functions)
- encodeULEB128(Func.Type, getStream());
-
- endSection(Section);
- }
-
- // === Table Section =========================================================
- // For now, always emit the table section, since indirect calls are not
- // valid without it. In the future, we could perhaps be more clever and omit
- // it if there are no indirect calls.
- startSection(Section, wasm::WASM_SEC_TABLE);
-
- // The number of tables, fixed to 1 for now.
- encodeULEB128(1, getStream());
-
- encodeSLEB128(wasm::WASM_TYPE_ANYFUNC, getStream());
-
- encodeULEB128(0, getStream()); // flags
- encodeULEB128(TableElems.size(), getStream()); // initial
-
- endSection(Section);
-
- // === Memory Section ========================================================
- // For now, always emit the memory section, since loads and stores are not
- // valid without it. In the future, we could perhaps be more clever and omit
- // it if there are no loads or stores.
- uint32_t NumPages =
- (DataBytes.size() + wasm::WasmPageSize - 1) / wasm::WasmPageSize;
-
- startSection(Section, wasm::WASM_SEC_MEMORY);
- encodeULEB128(1, getStream()); // number of memory spaces
-
- encodeULEB128(0, getStream()); // flags
- encodeULEB128(NumPages, getStream()); // initial
-
- endSection(Section);
-
- // === Global Section ========================================================
- if (!Globals.empty()) {
- startSection(Section, wasm::WASM_SEC_GLOBAL);
-
- encodeULEB128(Globals.size(), getStream());
- for (const WasmGlobal &Global : Globals) {
- writeValueType(Global.Type);
- write8(Global.IsMutable);
-
- if (Global.HasImport) {
- assert(Global.InitialValue == 0);
- write8(wasm::WASM_OPCODE_GET_GLOBAL);
- encodeULEB128(Global.ImportIndex, getStream());
- } else {
- assert(Global.ImportIndex == 0);
- write8(wasm::WASM_OPCODE_I32_CONST);
- encodeSLEB128(Global.InitialValue, getStream()); // offset
- }
- write8(wasm::WASM_OPCODE_END);
- }
-
- endSection(Section);
- }
-
- // === Export Section ========================================================
- if (!Exports.empty()) {
- startSection(Section, wasm::WASM_SEC_EXPORT);
-
- encodeULEB128(Exports.size(), getStream());
- for (const WasmExport &Export : Exports) {
- encodeULEB128(Export.FieldName.size(), getStream());
- writeBytes(Export.FieldName);
-
- encodeSLEB128(Export.Kind, getStream());
-
- encodeULEB128(Export.Index, getStream());
- }
-
- endSection(Section);
- }
-
-#if 0 // TODO: Start Section
- if (HaveStartFunction) {
- // === Start Section =========================================================
- startSection(Section, wasm::WASM_SEC_START);
-
- encodeSLEB128(StartFunction, getStream());
-
- endSection(Section);
- }
-#endif
-
- // === Elem Section ==========================================================
- if (!TableElems.empty()) {
- startSection(Section, wasm::WASM_SEC_ELEM);
-
- encodeULEB128(1, getStream()); // number of "segments"
- encodeULEB128(0, getStream()); // the table index
-
- // init expr for starting offset
- write8(wasm::WASM_OPCODE_I32_CONST);
- encodeSLEB128(0, getStream());
- write8(wasm::WASM_OPCODE_END);
-
- encodeULEB128(TableElems.size(), getStream());
- for (uint32_t Elem : TableElems)
- encodeULEB128(Elem, getStream());
-
- endSection(Section);
- }
-
- // === Code Section ==========================================================
- if (!Functions.empty()) {
- startSection(Section, wasm::WASM_SEC_CODE);
-
- encodeULEB128(Functions.size(), getStream());
-
- for (const WasmFunction &Func : Functions) {
- MCSectionWasm &FuncSection =
- static_cast<MCSectionWasm &>(Func.Sym->getSection());
-
- if (Func.Sym->isVariable())
- report_fatal_error("weak symbols not supported yet");
-
- if (Func.Sym->getOffset() != 0)
- report_fatal_error("function sections must contain one function each");
-
- if (!Func.Sym->getSize())
- report_fatal_error("function symbols must have a size set with .size");
-
- int64_t Size = 0;
- if (!Func.Sym->getSize()->evaluateAsAbsolute(Size, Layout))
- report_fatal_error(".size expression must be evaluatable");
-
- encodeULEB128(Size, getStream());
-
- FuncSection.setSectionOffset(getStream().tell() -
- Section.ContentsOffset);
-
- Asm.writeSectionData(&FuncSection, Layout);
- }
-
- // Apply the type index fixups for call_indirect etc. instructions.
- for (size_t i = 0, e = TypeIndexFixups.size(); i < e; ++i) {
- uint32_t Type = TypeIndexFixupTypes[i];
- unsigned Padding = PaddingFor5ByteULEB128(Type);
-
- const WasmRelocationEntry &Fixup = TypeIndexFixups[i];
- assert(Fixup.Addend == 0);
- assert(Fixup.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB);
- uint64_t Offset = Fixup.Offset +
- Fixup.FixupSection->getSectionOffset();
-
- uint8_t Buffer[16];
- unsigned SizeLen = encodeULEB128(Type, Buffer, Padding);
- assert(SizeLen == 5);
- getStream().pwrite((char *)Buffer, SizeLen,
- Section.ContentsOffset + Offset);
- }
-
- // Apply fixups.
- ApplyRelocations(CodeRelocations, getStream(), SymbolIndices,
- Section.ContentsOffset);
-
- endSection(Section);
- }
-
- // === Data Section ==========================================================
- uint32_t DataSectionHeaderSize = 0;
- if (!DataBytes.empty()) {
- startSection(Section, wasm::WASM_SEC_DATA);
-
- encodeULEB128(1, getStream()); // count
- encodeULEB128(0, getStream()); // memory index
- write8(wasm::WASM_OPCODE_I32_CONST);
- encodeSLEB128(0, getStream()); // offset
- write8(wasm::WASM_OPCODE_END);
- encodeULEB128(DataBytes.size(), getStream()); // size
- DataSectionHeaderSize = getStream().tell() - Section.ContentsOffset;
- writeBytes(DataBytes); // data
-
- // Apply fixups.
- ApplyRelocations(DataRelocations, getStream(), SymbolIndices,
- Section.ContentsOffset + DataSectionHeaderSize);
-
- endSection(Section);
- }
-
- // === Name Section ==========================================================
- uint32_t TotalFunctions = NumFuncImports + Functions.size();
- if (TotalFunctions != 0) {
- startSection(Section, wasm::WASM_SEC_CUSTOM, "name");
- SectionBookkeeping SubSection;
- startSection(SubSection, wasm::WASM_NAMES_FUNCTION);
-
- encodeULEB128(TotalFunctions, getStream());
- uint32_t Index = 0;
- for (const WasmImport &Import : Imports) {
- if (Import.Kind == wasm::WASM_EXTERNAL_FUNCTION) {
- encodeULEB128(Index, getStream());
- encodeULEB128(Import.FieldName.size(), getStream());
- writeBytes(Import.FieldName);
- ++Index;
- }
- }
- for (const WasmFunction &Func : Functions) {
- encodeULEB128(Index, getStream());
- encodeULEB128(Func.Sym->getName().size(), getStream());
- writeBytes(Func.Sym->getName());
- ++Index;
- }
-
- endSection(SubSection);
- endSection(Section);
- }
-
- // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md
- // for descriptions of the reloc sections.
-
- // === Code Reloc Section ====================================================
- if (!CodeRelocations.empty()) {
- startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.CODE");
-
- encodeULEB128(wasm::WASM_SEC_CODE, getStream());
-
- encodeULEB128(CodeRelocations.size() + TypeIndexFixups.size(), getStream());
-
- WriteRelocations(CodeRelocations, getStream(), SymbolIndices, 0);
- WriteTypeRelocations(TypeIndexFixups, TypeIndexFixupTypes, getStream());
-
- endSection(Section);
- }
-
- // === Data Reloc Section ====================================================
- if (!DataRelocations.empty()) {
- startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.DATA");
-
- encodeULEB128(wasm::WASM_SEC_DATA, getStream());
-
- encodeULEB128(DataRelocations.size(), getStream());
-
- WriteRelocations(DataRelocations, getStream(), SymbolIndices,
- DataSectionHeaderSize);
-
- endSection(Section);
- }
-
- // === Linking Metadata Section ==============================================
- if (HasStackPointer) {
- startSection(Section, wasm::WASM_SEC_CUSTOM, "linking");
-
- encodeULEB128(1, getStream()); // count
-
- encodeULEB128(wasm::WASM_STACK_POINTER, getStream()); // type
- encodeULEB128(StackPointerGlobal, getStream()); // id
-
- endSection(Section);
- }
+ writeTypeSection(FunctionTypes);
+ writeImportSection(Imports);
+ writeFunctionSection(Functions);
+ writeTableSection(TableElems);
+ writeMemorySection(DataBytes);
+ writeGlobalSection(Globals);
+ writeExportSection(Exports);
+ // TODO: Start Section
+ writeElemSection(TableElems);
+ writeCodeSection(Asm, Layout, SymbolIndices, Functions);
+ uint64_t DataSectionHeaderSize = writeDataSection(DataBytes, SymbolIndices);
+ writeNameSection(Functions, Imports, NumFuncImports);
+ writeCodeRelocSection(SymbolIndices);
+ writeDataRelocSection(SymbolIndices, DataSectionHeaderSize);
+ writeLinkingMetaDataSection(HasStackPointer, StackPointerGlobal);
// TODO: Translate the .comment section to the output.
-
// TODO: Translate debug sections to the output.
}
diff --git a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
index f652ff57f30d..21d29835624e 100644
--- a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
@@ -17,6 +17,11 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h"
#include "llvm/DebugInfo/CodeView/EnumTables.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
@@ -36,16 +41,80 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(InlineeInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef)
LLVM_YAML_DECLARE_SCALAR_TRAITS(HexFormattedString, false)
+LLVM_YAML_DECLARE_ENUM_TRAITS(DebugSubsectionKind)
LLVM_YAML_DECLARE_ENUM_TRAITS(FileChecksumKind)
LLVM_YAML_DECLARE_BITSET_TRAITS(LineFlags)
-LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::SourceLineEntry)
-LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::SourceColumnEntry)
-LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::SourceFileChecksumEntry)
-LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::SourceLineInfo)
-LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::SourceLineBlock)
-LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::InlineeInfo)
-LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::InlineeSite)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceLineEntry)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceColumnEntry)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceFileChecksumEntry)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceLineBlock)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(InlineeSite)
+
+namespace llvm {
+namespace CodeViewYAML {
+namespace detail {
+struct YAMLSubsectionBase {
+ explicit YAMLSubsectionBase(DebugSubsectionKind Kind) : Kind(Kind) {}
+ DebugSubsectionKind Kind;
+ virtual ~YAMLSubsectionBase() {}
+
+ virtual void map(IO &IO) = 0;
+ virtual std::unique_ptr<DebugSubsection>
+ toCodeViewSubsection(DebugStringTableSubsection *UseStrings,
+ DebugChecksumsSubsection *UseChecksums) const = 0;
+};
+}
+}
+}
+
+namespace {
+struct YAMLChecksumsSubsection : public YAMLSubsectionBase {
+ YAMLChecksumsSubsection()
+ : YAMLSubsectionBase(DebugSubsectionKind::FileChecksums) {}
+
+ void map(IO &IO) override;
+ std::unique_ptr<DebugSubsection>
+ toCodeViewSubsection(DebugStringTableSubsection *Strings,
+ DebugChecksumsSubsection *Checksums) const override;
+ static Expected<std::shared_ptr<YAMLChecksumsSubsection>>
+ fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings,
+ const DebugChecksumsSubsectionRef &FC);
+
+ std::vector<SourceFileChecksumEntry> Checksums;
+};
+
+struct YAMLLinesSubsection : public YAMLSubsectionBase {
+ YAMLLinesSubsection() : YAMLSubsectionBase(DebugSubsectionKind::Lines) {}
+
+ void map(IO &IO) override;
+ std::unique_ptr<DebugSubsection>
+ toCodeViewSubsection(DebugStringTableSubsection *Strings,
+ DebugChecksumsSubsection *Checksums) const override;
+ static Expected<std::shared_ptr<YAMLLinesSubsection>>
+ fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings,
+ const DebugChecksumsSubsectionRef &Checksums,
+ const DebugLinesSubsectionRef &Lines);
+
+ SourceLineInfo Lines;
+};
+
+struct YAMLInlineeLinesSubsection : public YAMLSubsectionBase {
+ YAMLInlineeLinesSubsection()
+ : YAMLSubsectionBase(DebugSubsectionKind::InlineeLines) {}
+
+ void map(IO &IO) override;
+ std::unique_ptr<DebugSubsection>
+ toCodeViewSubsection(DebugStringTableSubsection *Strings,
+ DebugChecksumsSubsection *Checksums) const override;
+ static Expected<std::shared_ptr<YAMLInlineeLinesSubsection>>
+ fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings,
+ const DebugChecksumsSubsectionRef &Checksums,
+ const DebugInlineeLinesSubsectionRef &Lines);
+
+ InlineeInfo InlineeLines;
+};
+}
void ScalarBitSetTraits<LineFlags>::bitset(IO &io, LineFlags &Flags) {
io.bitSetCase(Flags, "HasColumnInfo", LF_HaveColumns);
@@ -99,21 +168,6 @@ void MappingTraits<SourceFileChecksumEntry>::mapping(
IO.mapRequired("Checksum", Obj.ChecksumBytes);
}
-void MappingTraits<SourceLineInfo>::mapping(IO &IO, SourceLineInfo &Obj) {
- IO.mapRequired("CodeSize", Obj.CodeSize);
-
- IO.mapRequired("Flags", Obj.Flags);
- IO.mapRequired("RelocOffset", Obj.RelocOffset);
- IO.mapRequired("RelocSegment", Obj.RelocSegment);
- IO.mapRequired("Blocks", Obj.Blocks);
-}
-
-void MappingTraits<SourceFileInfo>::mapping(IO &IO, SourceFileInfo &Obj) {
- IO.mapOptional("Checksums", Obj.FileChecksums);
- IO.mapOptional("Lines", Obj.LineFragments);
- IO.mapOptional("InlineeLines", Obj.Inlinees);
-}
-
void MappingTraits<InlineeSite>::mapping(IO &IO, InlineeSite &Obj) {
IO.mapRequired("FileName", Obj.FileName);
IO.mapRequired("LineNum", Obj.SourceLineNum);
@@ -121,7 +175,310 @@ void MappingTraits<InlineeSite>::mapping(IO &IO, InlineeSite &Obj) {
IO.mapOptional("ExtraFiles", Obj.ExtraFiles);
}
-void MappingTraits<InlineeInfo>::mapping(IO &IO, InlineeInfo &Obj) {
- IO.mapRequired("HasExtraFiles", Obj.HasExtraFiles);
- IO.mapRequired("Sites", Obj.Sites);
+void YAMLChecksumsSubsection::map(IO &IO) {
+ IO.mapTag("!FileChecksums", true);
+ IO.mapRequired("Checksums", Checksums);
+}
+
+void YAMLLinesSubsection::map(IO &IO) {
+ IO.mapTag("!Lines", true);
+ IO.mapRequired("CodeSize", Lines.CodeSize);
+
+ IO.mapRequired("Flags", Lines.Flags);
+ IO.mapRequired("RelocOffset", Lines.RelocOffset);
+ IO.mapRequired("RelocSegment", Lines.RelocSegment);
+ IO.mapRequired("Blocks", Lines.Blocks);
+}
+
+void YAMLInlineeLinesSubsection::map(IO &IO) {
+ IO.mapTag("!InlineeLines", true);
+ IO.mapRequired("HasExtraFiles", InlineeLines.HasExtraFiles);
+ IO.mapRequired("Sites", InlineeLines.Sites);
+}
+
+void MappingTraits<YAMLDebugSubsection>::mapping(
+ IO &IO, YAMLDebugSubsection &Subsection) {
+ if (!IO.outputting()) {
+ if (IO.mapTag("!FileChecksums")) {
+ auto SS = std::make_shared<YAMLChecksumsSubsection>();
+ Subsection.Subsection = SS;
+ } else if (IO.mapTag("!Lines")) {
+ Subsection.Subsection = std::make_shared<YAMLLinesSubsection>();
+ } else if (IO.mapTag("!InlineeLines")) {
+ Subsection.Subsection = std::make_shared<YAMLInlineeLinesSubsection>();
+ } else {
+ llvm_unreachable("Unexpected subsection tag!");
+ }
+ }
+ Subsection.Subsection->map(IO);
+}
+
+static Expected<const YAMLChecksumsSubsection &>
+findChecksums(ArrayRef<YAMLDebugSubsection> Subsections) {
+ for (const auto &SS : Subsections) {
+ if (SS.Subsection->Kind == DebugSubsectionKind::FileChecksums) {
+ return static_cast<const YAMLChecksumsSubsection &>(*SS.Subsection);
+ }
+ }
+ return make_error<CodeViewError>(cv_error_code::no_records);
+}
+
+std::unique_ptr<DebugSubsection> YAMLChecksumsSubsection::toCodeViewSubsection(
+ DebugStringTableSubsection *UseStrings,
+ DebugChecksumsSubsection *UseChecksums) const {
+ assert(UseStrings && !UseChecksums);
+ auto Result = llvm::make_unique<DebugChecksumsSubsection>(*UseStrings);
+ for (const auto &CS : Checksums) {
+ Result->addChecksum(CS.FileName, CS.Kind, CS.ChecksumBytes.Bytes);
+ }
+ return std::move(Result);
+}
+
+std::unique_ptr<DebugSubsection> YAMLLinesSubsection::toCodeViewSubsection(
+ DebugStringTableSubsection *UseStrings,
+ DebugChecksumsSubsection *UseChecksums) const {
+ assert(UseStrings && UseChecksums);
+ auto Result =
+ llvm::make_unique<DebugLinesSubsection>(*UseChecksums, *UseStrings);
+ Result->setCodeSize(Lines.CodeSize);
+ Result->setRelocationAddress(Lines.RelocSegment, Lines.RelocOffset);
+ Result->setFlags(Lines.Flags);
+ for (const auto &LC : Lines.Blocks) {
+ Result->createBlock(LC.FileName);
+ if (Result->hasColumnInfo()) {
+ for (const auto &Item : zip(LC.Lines, LC.Columns)) {
+ auto &L = std::get<0>(Item);
+ auto &C = std::get<1>(Item);
+ uint32_t LE = L.LineStart + L.EndDelta;
+ Result->addLineAndColumnInfo(L.Offset,
+ LineInfo(L.LineStart, LE, L.IsStatement),
+ C.StartColumn, C.EndColumn);
+ }
+ } else {
+ for (const auto &L : LC.Lines) {
+ uint32_t LE = L.LineStart + L.EndDelta;
+ Result->addLineInfo(L.Offset, LineInfo(L.LineStart, LE, L.IsStatement));
+ }
+ }
+ }
+ return llvm::cast<DebugSubsection>(std::move(Result));
+}
+
+std::unique_ptr<DebugSubsection>
+YAMLInlineeLinesSubsection::toCodeViewSubsection(
+ DebugStringTableSubsection *UseStrings,
+ DebugChecksumsSubsection *UseChecksums) const {
+ assert(UseChecksums);
+ auto Result = llvm::make_unique<DebugInlineeLinesSubsection>(
+ *UseChecksums, InlineeLines.HasExtraFiles);
+
+ for (const auto &Site : InlineeLines.Sites) {
+ Result->addInlineSite(TypeIndex(Site.Inlinee), Site.FileName,
+ Site.SourceLineNum);
+ if (!InlineeLines.HasExtraFiles)
+ continue;
+
+ for (auto EF : Site.ExtraFiles) {
+ Result->addExtraFile(EF);
+ }
+ }
+ return llvm::cast<DebugSubsection>(std::move(Result));
+}
+
+static Expected<SourceFileChecksumEntry>
+convertOneChecksum(const DebugStringTableSubsectionRef &Strings,
+ const FileChecksumEntry &CS) {
+ auto ExpectedString = Strings.getString(CS.FileNameOffset);
+ if (!ExpectedString)
+ return ExpectedString.takeError();
+
+ SourceFileChecksumEntry Result;
+ Result.ChecksumBytes.Bytes = CS.Checksum;
+ Result.Kind = CS.Kind;
+ Result.FileName = *ExpectedString;
+ return Result;
+}
+
+static Expected<StringRef>
+getFileName(const DebugStringTableSubsectionRef &Strings,
+ const DebugChecksumsSubsectionRef &Checksums, uint32_t FileID) {
+ auto Iter = Checksums.getArray().at(FileID);
+ if (Iter == Checksums.getArray().end())
+ return make_error<CodeViewError>(cv_error_code::no_records);
+ uint32_t Offset = Iter->FileNameOffset;
+ return Strings.getString(Offset);
+}
+
+Expected<std::shared_ptr<YAMLChecksumsSubsection>>
+YAMLChecksumsSubsection::fromCodeViewSubsection(
+ const DebugStringTableSubsectionRef &Strings,
+ const DebugChecksumsSubsectionRef &FC) {
+ auto Result = std::make_shared<YAMLChecksumsSubsection>();
+
+ for (const auto &CS : FC) {
+ auto ConvertedCS = convertOneChecksum(Strings, CS);
+ if (!ConvertedCS)
+ return ConvertedCS.takeError();
+ Result->Checksums.push_back(*ConvertedCS);
+ }
+ return Result;
+}
+
+Expected<std::shared_ptr<YAMLLinesSubsection>>
+YAMLLinesSubsection::fromCodeViewSubsection(
+ const DebugStringTableSubsectionRef &Strings,
+ const DebugChecksumsSubsectionRef &Checksums,
+ const DebugLinesSubsectionRef &Lines) {
+ auto Result = std::make_shared<YAMLLinesSubsection>();
+ Result->Lines.CodeSize = Lines.header()->CodeSize;
+ Result->Lines.RelocOffset = Lines.header()->RelocOffset;
+ Result->Lines.RelocSegment = Lines.header()->RelocSegment;
+ Result->Lines.Flags = static_cast<LineFlags>(uint16_t(Lines.header()->Flags));
+ for (const auto &L : Lines) {
+ SourceLineBlock Block;
+ auto EF = getFileName(Strings, Checksums, L.NameIndex);
+ if (!EF)
+ return EF.takeError();
+ Block.FileName = *EF;
+ if (Lines.hasColumnInfo()) {
+ for (const auto &C : L.Columns) {
+ SourceColumnEntry SCE;
+ SCE.EndColumn = C.EndColumn;
+ SCE.StartColumn = C.StartColumn;
+ Block.Columns.push_back(SCE);
+ }
+ }
+ for (const auto &LN : L.LineNumbers) {
+ SourceLineEntry SLE;
+ LineInfo LI(LN.Flags);
+ SLE.Offset = LN.Offset;
+ SLE.LineStart = LI.getStartLine();
+ SLE.EndDelta = LI.getLineDelta();
+ SLE.IsStatement = LI.isStatement();
+ Block.Lines.push_back(SLE);
+ }
+ Result->Lines.Blocks.push_back(Block);
+ }
+ return Result;
+}
+
+Expected<std::shared_ptr<YAMLInlineeLinesSubsection>>
+YAMLInlineeLinesSubsection::fromCodeViewSubsection(
+ const DebugStringTableSubsectionRef &Strings,
+ const DebugChecksumsSubsectionRef &Checksums,
+ const DebugInlineeLinesSubsectionRef &Lines) {
+ auto Result = std::make_shared<YAMLInlineeLinesSubsection>();
+
+ Result->InlineeLines.HasExtraFiles = Lines.hasExtraFiles();
+ for (const auto &IL : Lines) {
+ InlineeSite Site;
+ auto ExpF = getFileName(Strings, Checksums, IL.Header->FileID);
+ if (!ExpF)
+ return ExpF.takeError();
+ Site.FileName = *ExpF;
+ Site.Inlinee = IL.Header->Inlinee.getIndex();
+ Site.SourceLineNum = IL.Header->SourceLineNum;
+ if (Lines.hasExtraFiles()) {
+ for (const auto EF : IL.ExtraFiles) {
+ auto ExpF2 = getFileName(Strings, Checksums, EF);
+ if (!ExpF2)
+ return ExpF2.takeError();
+ Site.ExtraFiles.push_back(*ExpF2);
+ }
+ }
+ Result->InlineeLines.Sites.push_back(Site);
+ }
+ return Result;
+}
+
+Expected<std::vector<std::unique_ptr<DebugSubsection>>>
+llvm::CodeViewYAML::convertSubsectionList(
+ ArrayRef<YAMLDebugSubsection> Subsections,
+ DebugStringTableSubsection &Strings) {
+ std::vector<std::unique_ptr<DebugSubsection>> Result;
+ if (Subsections.empty())
+ return std::move(Result);
+
+ auto Checksums = findChecksums(Subsections);
+ if (!Checksums)
+ return Checksums.takeError();
+ auto ChecksumsBase = Checksums->toCodeViewSubsection(&Strings, nullptr);
+ DebugChecksumsSubsection &CS =
+ llvm::cast<DebugChecksumsSubsection>(*ChecksumsBase);
+ for (const auto &SS : Subsections) {
+ // We've already converted the checksums subsection, don't do it
+ // twice.
+ std::unique_ptr<DebugSubsection> CVS;
+ if (SS.Subsection->Kind == DebugSubsectionKind::FileChecksums)
+ CVS = std::move(ChecksumsBase);
+ else
+ CVS = SS.Subsection->toCodeViewSubsection(&Strings, &CS);
+ Result.push_back(std::move(CVS));
+ }
+ return std::move(Result);
+}
+
+namespace {
+struct SubsectionConversionVisitor : public DebugSubsectionVisitor {
+ explicit SubsectionConversionVisitor(
+ const DebugStringTableSubsectionRef &Strings,
+ const DebugChecksumsSubsectionRef &Checksums)
+ : Strings(Strings), Checksums(Checksums) {}
+
+ Error visitUnknown(DebugUnknownSubsectionRef &Unknown) override;
+ Error visitLines(DebugLinesSubsectionRef &Lines) override;
+ Error visitFileChecksums(DebugChecksumsSubsectionRef &Checksums) override;
+ Error visitInlineeLines(DebugInlineeLinesSubsectionRef &Inlinees) override;
+
+ YAMLDebugSubsection Subsection;
+
+private:
+ const DebugStringTableSubsectionRef &Strings;
+ const DebugChecksumsSubsectionRef &Checksums;
+};
+
+Error SubsectionConversionVisitor::visitUnknown(
+ DebugUnknownSubsectionRef &Unknown) {
+ return make_error<CodeViewError>(cv_error_code::operation_unsupported);
+}
+
+Error SubsectionConversionVisitor::visitLines(DebugLinesSubsectionRef &Lines) {
+ auto Result =
+ YAMLLinesSubsection::fromCodeViewSubsection(Strings, Checksums, Lines);
+ if (!Result)
+ return Result.takeError();
+ Subsection.Subsection = *Result;
+ return Error::success();
+}
+
+Error SubsectionConversionVisitor::visitFileChecksums(
+ DebugChecksumsSubsectionRef &Checksums) {
+ auto Result =
+ YAMLChecksumsSubsection::fromCodeViewSubsection(Strings, Checksums);
+ if (!Result)
+ return Result.takeError();
+ Subsection.Subsection = *Result;
+ return Error::success();
+}
+
+Error SubsectionConversionVisitor::visitInlineeLines(
+ DebugInlineeLinesSubsectionRef &Inlinees) {
+ auto Result = YAMLInlineeLinesSubsection::fromCodeViewSubsection(
+ Strings, Checksums, Inlinees);
+ if (!Result)
+ return Result.takeError();
+ Subsection.Subsection = *Result;
+ return Error::success();
+}
+}
+
+Expected<YAMLDebugSubsection> YAMLDebugSubsection::fromCodeViewSubection(
+ const DebugStringTableSubsectionRef &Strings,
+ const DebugChecksumsSubsectionRef &Checksums,
+ const DebugSubsectionRecord &SS) {
+ SubsectionConversionVisitor V(Strings, Checksums);
+ if (auto EC = visitDebugSubsection(SS, V))
+ return std::move(EC);
+
+ return V.Subsection;
}
diff --git a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
index 6e8bb5c7372c..bd97af3a9323 100644
--- a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
@@ -148,7 +148,8 @@ struct SymbolRecordBase {
virtual ~SymbolRecordBase() {}
virtual void map(yaml::IO &io) = 0;
virtual codeview::CVSymbol
- toCodeViewSymbol(BumpPtrAllocator &Allocator) const = 0;
+ toCodeViewSymbol(BumpPtrAllocator &Allocator,
+ CodeViewContainer Container) const = 0;
virtual Error fromCodeViewSymbol(codeview::CVSymbol Type) = 0;
};
@@ -159,8 +160,9 @@ template <typename T> struct SymbolRecordImpl : public SymbolRecordBase {
void map(yaml::IO &io) override;
codeview::CVSymbol
- toCodeViewSymbol(BumpPtrAllocator &Allocator) const override {
- return SymbolSerializer::writeOneSymbol(Symbol, Allocator);
+ toCodeViewSymbol(BumpPtrAllocator &Allocator,
+ CodeViewContainer Container) const override {
+ return SymbolSerializer::writeOneSymbol(Symbol, Allocator, Container);
}
Error fromCodeViewSymbol(codeview::CVSymbol CVS) override {
return SymbolDeserializer::deserializeAs<T>(CVS, Symbol);
@@ -429,8 +431,8 @@ template <> void SymbolRecordImpl<ThreadLocalDataSym>::map(IO &IO) {
}
CVSymbol CodeViewYAML::SymbolRecord::toCodeViewSymbol(
- BumpPtrAllocator &Allocator) const {
- return Symbol->toCodeViewSymbol(Allocator);
+ BumpPtrAllocator &Allocator, CodeViewContainer Container) const {
+ return Symbol->toCodeViewSymbol(Allocator, Container);
}
namespace llvm {
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
index eb81e58b9b0e..17c60348633c 100644
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -310,6 +310,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// Catch trivial redundancies
FPM.addPass(EarlyCSEPass());
+ // Hoisting of scalars and load expressions.
+ if (EnableGVNHoist)
+ FPM.addPass(GVNHoistPass());
+
// Speculative execution if the target has divergent branches; otherwise nop.
FPM.addPass(SpeculativeExecutionPass());
@@ -473,8 +477,6 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
EarlyFPM.addPass(SROA());
EarlyFPM.addPass(EarlyCSEPass());
EarlyFPM.addPass(LowerExpectIntrinsicPass());
- if (EnableGVNHoist)
- EarlyFPM.addPass(GVNHoistPass());
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
// Interprocedural constant propagation now that basic cleanup has occured
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 318e21da999d..f7b7ad89e959 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -649,12 +649,10 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
case Triple::tce:
case Triple::tcele:
case Triple::thumbeb:
- case Triple::xcore:
- return Triple::ELF;
-
case Triple::wasm32:
case Triple::wasm64:
- return Triple::Wasm;
+ case Triple::xcore:
+ return Triple::ELF;
case Triple::ppc:
case Triple::ppc64:
diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.h b/lib/Target/AArch64/AArch64PBQPRegAlloc.h
index 4f656f94ea12..b99c1d1d6b3e 100644
--- a/lib/Target/AArch64/AArch64PBQPRegAlloc.h
+++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.h
@@ -1,4 +1,4 @@
-//===-- AArch64PBQPRegAlloc.h - AArch64 specific PBQP constraints -------===//
+//==- AArch64PBQPRegAlloc.h - AArch64 specific PBQP constraints --*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -15,6 +15,8 @@
namespace llvm {
+class TargetRegisterInfo;
+
/// Add the accumulator chaining constraint to a PBQP graph
class A57ChainingConstraint : public PBQPRAConstraint {
public:
@@ -33,6 +35,7 @@ private:
// Add constraints between existing chains
void addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned Ra);
};
-}
+
+} // end namespace llvm
#endif // LLVM_LIB_TARGET_AARCH64_AARCH64PBQPREGALOC_H
diff --git a/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/lib/Target/AArch64/AArch64SchedFalkorDetails.td
index d098cf7a5a37..7402bcf1346c 100644
--- a/lib/Target/AArch64/AArch64SchedFalkorDetails.td
+++ b/lib/Target/AArch64/AArch64SchedFalkorDetails.td
@@ -56,12 +56,14 @@ def FalkorWr_1Z_0cyc : SchedWriteRes<[FalkorUnitZ]> { let Latency = 0; }
def FalkorWr_1ZB_0cyc : SchedWriteRes<[FalkorUnitZB]> { let Latency = 0; }
def FalkorWr_1LD_3cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 3; }
def FalkorWr_1LD_4cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 4; }
+def FalkorWr_1XYZ_0cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 0; }
def FalkorWr_1XYZ_1cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; }
def FalkorWr_1XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; }
def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; }
def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; }
def FalkorWr_1none_0cyc : SchedWriteRes<[]> { let Latency = 0; }
+def FalkorWr_1VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 0; }
def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; }
def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; }
def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; }
@@ -76,6 +78,7 @@ def FalkorWr_1LD_0cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 0; }
def FalkorWr_1ST_0cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 0; }
def FalkorWr_1ST_3cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 3; }
+def FalkorWr_1GTOV_0cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 0; }
def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; }
def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; }
def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; }
@@ -83,6 +86,10 @@ def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; }
//===----------------------------------------------------------------------===//
// Define 2 micro-op types
+def FalkorWr_2VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 0;
+ let NumMicroOps = 2;
+}
def FalkorWr_2VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
let Latency = 1;
let NumMicroOps = 2;
@@ -476,17 +483,19 @@ def FalkorReadFMA64 : SchedReadAdvance<2, [FalkorWr_FMUL64_1VXVY_6cyc, FalkorWr
// SchedPredicates and WriteVariants for Immediate Zero and LSLFast/ASRFast
// -----------------------------------------------------------------------------
def FalkorImmZPred : SchedPredicate<[{MI->getOperand(1).getImm() == 0}]>;
-def FalkorFMOVZrReg : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR ||
+def FalkorOp1ZrReg : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR ||
+
MI->getOperand(1).getReg() == AArch64::XZR}]>;
def FalkorShiftExtFastPred : SchedPredicate<[{TII->isFalkorShiftExtFast(*MI)}]>;
def FalkorWr_FMOV : SchedWriteVariant<[
- SchedVar<FalkorFMOVZrReg, [FalkorWr_1none_0cyc]>,
+ SchedVar<FalkorOp1ZrReg, [FalkorWr_1none_0cyc]>,
SchedVar<NoSchedPred, [FalkorWr_1GTOV_1cyc]>]>;
def FalkorWr_MOVZ : SchedWriteVariant<[
SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>,
- SchedVar<NoSchedPred, [FalkorWr_1XYZB_1cyc]>]>;
+ SchedVar<NoSchedPred, [FalkorWr_1XYZB_0cyc]>]>; // imm fwd
+
def FalkorWr_ADDSUBsx : SchedWriteVariant<[
SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_1cyc]>,
@@ -500,6 +509,10 @@ def FalkorWr_LDRSro : SchedWriteVariant<[
SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_4cyc]>,
SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_5cyc]>]>;
+def FalkorWr_ORRi : SchedWriteVariant<[
+ SchedVar<FalkorOp1ZrReg, [FalkorWr_1XYZ_0cyc]>, // imm fwd
+ SchedVar<NoSchedPred, [FalkorWr_1XYZ_1cyc]>]>;
+
def FalkorWr_PRFMro : SchedWriteVariant<[
SchedVar<FalkorShiftExtFastPred, [FalkorWr_1ST_3cyc]>,
SchedVar<NoSchedPred, [FalkorWr_1XYZ_1ST_4cyc]>]>;
@@ -810,7 +823,8 @@ def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^BIC(S)?(W|X)r(r|s)$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EON(W|X)r(r|s)$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EOR(W|X)r(i|r|s)$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORN(W|X)r(r|s)$")>;
-def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORR(W|X)r(i|r|s)$")>;
+def : InstRW<[FalkorWr_ORRi], (instregex "^ORR(W|X)ri$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORR(W|X)r(r|s)$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SBC(S)?(W|X)r$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SUB(S)?(W|X)r(r|i)$")>;
def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^ADD(S)?(W|X)r(s|x|x64)$")>;
@@ -825,7 +839,7 @@ def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^INSv(i8|i16)(gpr|lane)$")>;
def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^(S|U)MOVv.*$")>;
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v8i8$")>;
def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs EXTv8i8)>;
-def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>;
+def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>; // imm fwd
def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs TBLv8i8One)>;
def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs NOTv8i8)>;
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^REV(16|32|64)v.*$")>;
@@ -849,7 +863,7 @@ def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],
def : InstRW<[FalkorWr_2GTOV_1cyc], (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>;
def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v16i8$")>;
def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs EXTv16i8)>;
-def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>;
+def : InstRW<[FalkorWr_2VXVY_0cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>; // imm fwd
def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs NOTv16i8)>;
def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs TBLv16i8One)>;
@@ -1036,13 +1050,13 @@ def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, ReadDefault, ReadDefault, FalkorReadFM
// FP Miscellaneous Instructions
// -----------------------------------------------------------------------------
def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(WS|XD|XDHigh)r$")>;
-def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^FMOV(S|D)i$")>;
+def : InstRW<[FalkorWr_1GTOV_0cyc], (instregex "^FMOV(S|D)i$")>; // imm fwd
def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)S(W|X)(D|S)ri$")>;
def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(d|s)$")>;
def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(SW|DX|DXHigh)r$")>;
-def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FMOV(Sr|Dr|v.*_ns)$")>;
+def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "^FMOV(Sr|Dr|v.*_ns)$")>; // imm fwd
// FIXME: We are currently generating movi v0.2d, #0 for these, which is worse than fmov wzr/xzr
-def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs FMOVD0, FMOVS0)>;
+def : InstRW<[FalkorWr_2VXVY_0cyc], (instrs FMOVD0, FMOVS0)>; // imm fwd
def : InstRW<[FalkorWr_1GTOV_4cyc], (instregex "^(S|U)CVTF(S|U)(W|X)(D|S)ri$")>;
def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i32|v2i32|v1i64|v2f32|d|s)(_shift)?")>;
@@ -1107,11 +1121,12 @@ def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
// Move and Shift Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(LSLV|LSRV|ASRV|RORV|MOVK)(W|X).*")>;
-def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^ADRP?$")>;
-def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^MOVN(W|X)i$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(LSLV|LSRV|ASRV|RORV)(W|X)r$")>;
+def : InstRW<[FalkorWr_1XYZ_0cyc], (instregex "^MOVK(W|X)i$")>; // imm fwd
+def : InstRW<[FalkorWr_1XYZB_0cyc], (instregex "^ADRP?$")>; // imm fwd
+def : InstRW<[FalkorWr_1XYZB_0cyc], (instregex "^MOVN(W|X)i$")>; // imm fwd
def : InstRW<[FalkorWr_MOVZ], (instregex "^MOVZ(W|X)i$")>;
-def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs MOVi32imm, MOVi64imm)>;
+def : InstRW<[FalkorWr_1XYZ_0cyc], (instrs MOVi32imm, MOVi64imm)>; // imm fwd (approximation)
def : InstRW<[WriteSequence<[FalkorWr_1XYZ_1cyc, FalkorWr_1XYZ_1cyc]>],
(instrs MOVaddr, MOVaddrBA, MOVaddrCP, MOVaddrEXT, MOVaddrJT, MOVaddrTLS)>;
def : InstRW<[WriteSequence<[FalkorWr_1LD_3cyc, FalkorWr_1XYZ_1cyc]>],
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
index 78ff3bbe3d1a..55d18c3f3646 100644
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -55,6 +55,8 @@ FunctionPass *createAMDGPUMachineCFGStructurizerPass();
void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
extern char &AMDGPUMachineCFGStructurizerID;
+void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
+
ModulePass *createAMDGPUAnnotateKernelFeaturesPass();
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
extern char &AMDGPUAnnotateKernelFeaturesID;
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index e7ebb37a9d62..b50e8d1d659e 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -365,6 +365,13 @@ def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
"Force to generate flat instruction for global"
>;
+def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature <
+ "auto-waitcnt-before-barrier",
+ "AutoWaitcntBeforeBarrier",
+ "true",
+ "Hardware automatically inserts waitcnt before barrier"
+>;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
diff --git a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index 1d03714874e2..8084d368c80f 100644
--- a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -22,18 +22,22 @@ using namespace llvm;
namespace {
class AMDGPUAlwaysInline : public ModulePass {
- static char ID;
-
bool GlobalOpt;
public:
- AMDGPUAlwaysInline(bool GlobalOpt) : ModulePass(ID), GlobalOpt(GlobalOpt) { }
+ static char ID;
+
+ AMDGPUAlwaysInline(bool GlobalOpt = false) :
+ ModulePass(ID), GlobalOpt(GlobalOpt) { }
bool runOnModule(Module &M) override;
StringRef getPassName() const override { return "AMDGPU Always Inline Pass"; }
};
} // End anonymous namespace
+INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline",
+ "AMDGPU Inline All Functions", false, false)
+
char AMDGPUAlwaysInline::ID = 0;
bool AMDGPUAlwaysInline::runOnModule(Module &M) {
diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 57905be18813..267f4807a788 100644
--- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -28,11 +28,16 @@ using namespace llvm;
AMDGPULegalizerInfo::AMDGPULegalizerInfo() {
using namespace TargetOpcode;
+ const LLT S1= LLT::scalar(1);
const LLT S32 = LLT::scalar(32);
const LLT S64 = LLT::scalar(64);
const LLT P1 = LLT::pointer(1, 64);
const LLT P2 = LLT::pointer(2, 64);
+ // FIXME: i1 operands to intrinsics should always be legal, but other i1
+ // values may not be legal. We need to figure out how to distinguish
+ // between these two scenarios.
+ setAction({G_CONSTANT, S1}, Legal);
setAction({G_CONSTANT, S32}, Legal);
setAction({G_CONSTANT, S64}, Legal);
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 6e301b4ad527..8d157e2f98f2 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -91,6 +91,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
FPExceptions(false),
DX10Clamp(false),
FlatForGlobal(false),
+ AutoWaitcntBeforeBarrier(false),
UnalignedScratchAccess(false),
UnalignedBufferAccess(false),
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 0582ce95693a..ed9cbb994fad 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -110,6 +110,7 @@ protected:
bool FPExceptions;
bool DX10Clamp;
bool FlatForGlobal;
+ bool AutoWaitcntBeforeBarrier;
bool UnalignedScratchAccess;
bool UnalignedBufferAccess;
bool HasApertureRegs;
@@ -195,7 +196,8 @@ public:
}
bool isOpenCLEnv() const {
- return TargetTriple.getEnvironment() == Triple::OpenCL;
+ return TargetTriple.getEnvironment() == Triple::OpenCL ||
+ TargetTriple.getEnvironmentName() == "amdgizcl";
}
Generation getGeneration() const {
@@ -363,6 +365,10 @@ public:
return FlatForGlobal;
}
+ bool hasAutoWaitcntBeforeBarrier() const {
+ return AutoWaitcntBeforeBarrier;
+ }
+
bool hasUnalignedBufferAccess() const {
return UnalignedBufferAccess;
}
@@ -727,12 +733,6 @@ public:
/// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
- /// \returns True if waitcnt instruction is needed before barrier instruction,
- /// false otherwise.
- bool needWaitcntBeforeBarrier() const {
- return true;
- }
-
/// \returns true if the flat_scratch register should be initialized with the
/// pointer to the wave's scratch memory rather than a size and offset.
bool flatScratchIsPointer() const {
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 596f02ae4a64..404598ff4738 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -116,7 +116,7 @@ static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
static cl::opt<bool> EnableSIInsertWaitcntsPass(
"enable-si-insert-waitcnts",
cl::desc("Use new waitcnt insertion pass"),
- cl::init(false));
+ cl::init(true));
// Option to run late CFG structurizer
static cl::opt<bool> LateCFGStructurize(
@@ -139,6 +139,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeSIShrinkInstructionsPass(*PR);
initializeSIFixControlFlowLiveIntervalsPass(*PR);
initializeSILoadStoreOptimizerPass(*PR);
+ initializeAMDGPUAlwaysInlinePass(*PR);
initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
initializeAMDGPUAnnotateUniformValuesPass(*PR);
initializeAMDGPULowerIntrinsicsPass(*PR);
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index f13629a3185f..dfac068d1f69 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -35,9 +35,12 @@ struct FoldCandidate {
};
unsigned char UseOpNo;
MachineOperand::MachineOperandType Kind;
+ bool Commuted;
- FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
- UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()) {
+ FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
+ bool Commuted_ = false) :
+ UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()),
+ Commuted(Commuted_) {
if (FoldOp->isImm()) {
ImmToFold = FoldOp->getImm();
} else if (FoldOp->isFI()) {
@@ -59,6 +62,10 @@ struct FoldCandidate {
bool isReg() const {
return Kind == MachineOperand::MO_Register;
}
+
+ bool isCommuted() const {
+ return Commuted;
+ }
};
class SIFoldOperands : public MachineFunctionPass {
@@ -237,8 +244,13 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
!TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
return false;
- if (!TII->isOperandLegal(*MI, OpNo, OpToFold))
+ if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
+ TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
return false;
+ }
+
+ FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold, true));
+ return true;
}
FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
@@ -699,6 +711,9 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n');
tryFoldInst(TII, Fold.UseMI);
+ } else if (Fold.isCommuted()) {
+ // Restoring instruction's original operand order if fold has failed.
+ TII->commuteInstruction(*Fold.UseMI, false);
}
}
}
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 76c2644867aa..b48b23911105 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3571,7 +3571,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
}
if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS) {
if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) &&
- isMemOpHasNoClobberedMemOperand(Load))
+ !Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load))
return SDValue();
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index e22166d03e9a..c10badba88f3 100644
--- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1009,7 +1009,8 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore(
// occurs before the instruction. Doing it here prevents any additional
// S_WAITCNTs from being emitted if the instruction was marked as
// requiring a WAITCNT beforehand.
- if (MI.getOpcode() == AMDGPU::S_BARRIER && ST->needWaitcntBeforeBarrier()) {
+ if (MI.getOpcode() == AMDGPU::S_BARRIER &&
+ !ST->hasAutoWaitcntBeforeBarrier()) {
EmitSwaitcnt |=
ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
EmitSwaitcnt |= ScoreBrackets->updateByWait(
diff --git a/lib/Target/AMDGPU/SIInsertWaits.cpp b/lib/Target/AMDGPU/SIInsertWaits.cpp
index 9f32ecfa52ff..bc86515d8b1f 100644
--- a/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -630,7 +630,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
// but we also want to wait for any other outstanding transfers before
// signalling other hardware blocks
if ((I->getOpcode() == AMDGPU::S_BARRIER &&
- ST->needWaitcntBeforeBarrier()) ||
+ !ST->hasAutoWaitcntBeforeBarrier()) ||
I->getOpcode() == AMDGPU::S_SENDMSG ||
I->getOpcode() == AMDGPU::S_SENDMSGHALT)
Required = LastIssued;
diff --git a/lib/Target/AMDGPU/SMInstructions.td b/lib/Target/AMDGPU/SMInstructions.td
index 5b840a14dbc3..73dd8b7daa4e 100644
--- a/lib/Target/AMDGPU/SMInstructions.td
+++ b/lib/Target/AMDGPU/SMInstructions.td
@@ -229,6 +229,7 @@ def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N)) ||
(Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS &&
+ !Ld->isVolatile() &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N) &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)));
}]>;
diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td
index 001fc960b228..77fc9551cff9 100644
--- a/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/lib/Target/AMDGPU/VOP3Instructions.td
@@ -245,9 +245,10 @@ def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
let SubtargetPredicate = Has16BitInsts in {
+def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup>;
+
let isCommutable = 1 in {
-def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup>;
def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma>;
def V_INTERP_P1LL_F16 : VOP3Inst <"v_interp_p1ll_f16", VOP3_Profile<VOP_F32_F32_F16>>;
def V_INTERP_P1LV_F16 : VOP3Inst <"v_interp_p1lv_f16", VOP3_Profile<VOP_F32_F32_F16_F16>>;
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 46fd1f70ee99..ca68f5d42c32 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -205,6 +205,13 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
"AvoidCPSRPartialUpdate", "true",
"Avoid CPSR partial update for OOO execution">;
+/// Disable +1 predication cost for instructions updating CPSR.
+/// Enabled for Cortex-A57.
+def FeatureCheapPredicableCPSR : SubtargetFeature<"cheap-predicable-cpsr",
+ "CheapPredicableCPSRDef",
+ "true",
+ "Disable +1 predication cost for instructions updating CPSR">;
+
def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
"AvoidMOVsShifterOperand", "true",
"Avoid movs instructions with shifter operand">;
@@ -788,12 +795,14 @@ def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53,
FeatureCRC,
FeatureFPAO]>;
-def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57,
- FeatureHWDivThumb,
- FeatureHWDivARM,
- FeatureCrypto,
- FeatureCRC,
- FeatureFPAO]>;
+def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57,
+ FeatureHWDivThumb,
+ FeatureHWDivARM,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureFPAO,
+ FeatureAvoidPartialCPSR,
+ FeatureCheapPredicableCPSR]>;
def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72,
FeatureHWDivThumb,
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 5c9d589e2625..f8b65573f9cd 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -558,13 +558,68 @@ bool ARMBaseInstrInfo::DefinesPredicate(
return Found;
}
-static bool isCPSRDefined(const MachineInstr *MI) {
- for (const auto &MO : MI->operands())
+bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) {
+ for (const auto &MO : MI.operands())
if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
return true;
return false;
}
+bool ARMBaseInstrInfo::isAddrMode3OpImm(const MachineInstr &MI,
+ unsigned Op) const {
+ const MachineOperand &Offset = MI.getOperand(Op + 1);
+ return Offset.getReg() != 0;
+}
+
+// Load with negative register offset requires additional 1cyc and +I unit
+// for Cortex A57
+bool ARMBaseInstrInfo::isAddrMode3OpMinusReg(const MachineInstr &MI,
+ unsigned Op) const {
+ const MachineOperand &Offset = MI.getOperand(Op + 1);
+ const MachineOperand &Opc = MI.getOperand(Op + 2);
+ assert(Opc.isImm());
+ assert(Offset.isReg());
+ int64_t OpcImm = Opc.getImm();
+
+ bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub;
+ return (isSub && Offset.getReg() != 0);
+}
+
+bool ARMBaseInstrInfo::isLdstScaledReg(const MachineInstr &MI,
+ unsigned Op) const {
+ const MachineOperand &Opc = MI.getOperand(Op + 2);
+ unsigned OffImm = Opc.getImm();
+ return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
+}
+
+// Load, scaled register offset, not plus LSL2
+bool ARMBaseInstrInfo::isLdstScaledRegNotPlusLsl2(const MachineInstr &MI,
+ unsigned Op) const {
+ const MachineOperand &Opc = MI.getOperand(Op + 2);
+ unsigned OffImm = Opc.getImm();
+
+ bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add;
+ unsigned Amt = ARM_AM::getAM2Offset(OffImm);
+ ARM_AM::ShiftOpc ShiftOpc = ARM_AM::getAM2ShiftOpc(OffImm);
+ if (ShiftOpc == ARM_AM::no_shift) return false; // not scaled
+ bool SimpleScaled = (isAdd && ShiftOpc == ARM_AM::lsl && Amt == 2);
+ return !SimpleScaled;
+}
+
+// Minus reg for ldstso addr mode
+bool ARMBaseInstrInfo::isLdstSoMinusReg(const MachineInstr &MI,
+ unsigned Op) const {
+ unsigned OffImm = MI.getOperand(Op + 2).getImm();
+ return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
+}
+
+// Load, scaled register offset
+bool ARMBaseInstrInfo::isAm2ScaledReg(const MachineInstr &MI,
+ unsigned Op) const {
+ unsigned OffImm = MI.getOperand(Op + 2).getImm();
+ return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
+}
+
static bool isEligibleForITBlock(const MachineInstr *MI) {
switch (MI->getOpcode()) {
default: return true;
@@ -590,7 +645,7 @@ static bool isEligibleForITBlock(const MachineInstr *MI) {
case ARM::tSUBi3: // SUB (immediate) T1
case ARM::tSUBi8: // SUB (immediate) T2
case ARM::tSUBrr: // SUB (register) T1
- return !isCPSRDefined(MI);
+ return !ARMBaseInstrInfo::isCPSRDefined(*MI);
}
}
@@ -3349,6 +3404,22 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
return DefCycle;
}
+bool ARMBaseInstrInfo::isLDMBaseRegInList(const MachineInstr &MI) const {
+ unsigned BaseReg = MI.getOperand(0).getReg();
+ for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) {
+ const auto &Op = MI.getOperand(i);
+ if (Op.isReg() && Op.getReg() == BaseReg)
+ return true;
+ }
+ return false;
+}
+unsigned
+ARMBaseInstrInfo::getLDMVariableDefsSize(const MachineInstr &MI) const {
+ // ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops
+ // (outs GPR:$wb), (ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops)
+ return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands();
+}
+
int
ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
@@ -4119,7 +4190,8 @@ unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
const MCInstrDesc &MCID = MI.getDesc();
- if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) {
+ if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
+ !Subtarget.cheapPredicableCPSRDef())) {
// When predicated, CPSR is an additional source operand for CPSR updating
// instructions, this apparently increases their latencies.
return 1;
@@ -4148,7 +4220,8 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
}
const MCInstrDesc &MCID = MI.getDesc();
- if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) {
+ if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
+ !Subtarget.cheapPredicableCPSRDef()))) {
// When predicated, CPSR is an additional source operand for CPSR updating
// instructions, this apparently increases their latencies.
*PredCost = 1;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index dd7fe871345a..c52e572786d4 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -159,6 +159,24 @@ public:
bool isPredicable(const MachineInstr &MI) const override;
+ // CPSR defined in instruction
+ static bool isCPSRDefined(const MachineInstr &MI);
+ bool isAddrMode3OpImm(const MachineInstr &MI, unsigned Op) const;
+ bool isAddrMode3OpMinusReg(const MachineInstr &MI, unsigned Op) const;
+
+ // Load, scaled register offset
+ bool isLdstScaledReg(const MachineInstr &MI, unsigned Op) const;
+ // Load, scaled register offset, not plus LSL2
+ bool isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, unsigned Op) const;
+ // Minus reg for ldstso addr mode
+ bool isLdstSoMinusReg(const MachineInstr &MI, unsigned Op) const;
+ // Scaled register offset in address mode 2
+ bool isAm2ScaledReg(const MachineInstr &MI, unsigned Op) const;
+ // Load multiple, base reg in list
+ bool isLDMBaseRegInList(const MachineInstr &MI) const;
+ // get LDM variable defs size
+ unsigned getLDMVariableDefsSize(const MachineInstr &MI) const;
+
/// GetInstSize - Returns the size of the specified MachineInstr.
///
unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
index 31a2f499a9a7..a33d025d114e 100644
--- a/lib/Target/ARM/ARMCallLowering.cpp
+++ b/lib/Target/ARM/ARMCallLowering.cpp
@@ -34,7 +34,7 @@ ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI)
static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI,
Type *T) {
- if (T->isArrayTy())
+ if (T->isArrayTy() || T->isStructTy())
return true;
EVT VT = TLI.getValueType(DL, T, true);
@@ -167,8 +167,11 @@ void ARMCallLowering::splitToValueTypes(
if (SplitVTs.size() == 1) {
// Even if there is no splitting to do, we still want to replace the
// original type (e.g. pointer type -> integer).
- SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx),
- OrigArg.Flags, OrigArg.IsFixed);
+ auto Flags = OrigArg.Flags;
+ unsigned OriginalAlignment = DL.getABITypeAlignment(OrigArg.Ty);
+ Flags.setOrigAlign(OriginalAlignment);
+ SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx), Flags,
+ OrigArg.IsFixed);
return;
}
@@ -177,6 +180,10 @@ void ARMCallLowering::splitToValueTypes(
EVT SplitVT = SplitVTs[i];
Type *SplitTy = SplitVT.getTypeForEVT(Ctx);
auto Flags = OrigArg.Flags;
+
+ unsigned OriginalAlignment = DL.getABITypeAlignment(SplitTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
bool NeedsConsecutiveRegisters =
TLI.functionArgumentNeedsConsecutiveRegisters(
SplitTy, F->getCallingConv(), F->isVarArg());
@@ -185,6 +192,7 @@ void ARMCallLowering::splitToValueTypes(
if (i == e - 1)
Flags.setInConsecutiveRegsLast();
}
+
SplitArgs.push_back(
ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL)),
SplitTy, Flags, OrigArg.IsFixed});
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index ec5b97cba8cd..1c7902520f2d 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -147,6 +147,9 @@ def : PredicateProlog<[{
const ARMBaseInstrInfo *TII =
static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo());
(void)TII;
+ const ARMSubtarget *STI =
+ static_cast<const ARMSubtarget*>(SchedModel->getSubtargetInfo());
+ (void)STI;
}]>;
def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(*MI)}]>;
@@ -420,3 +423,4 @@ include "ARMScheduleA8.td"
include "ARMScheduleA9.td"
include "ARMScheduleSwift.td"
include "ARMScheduleR52.td"
+include "ARMScheduleA57.td"
diff --git a/lib/Target/ARM/ARMScheduleA57.td b/lib/Target/ARM/ARMScheduleA57.td
new file mode 100644
index 000000000000..525079d12d51
--- /dev/null
+++ b/lib/Target/ARM/ARMScheduleA57.td
@@ -0,0 +1,1471 @@
+//=- ARMScheduleA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for ARM Cortex-A57 to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// *** Common description and scheduling model parameters taken from AArch64 ***
+// The Cortex-A57 is a traditional superscalar microprocessor with a
+// conservative 3-wide in-order stage for decode and dispatch. Combined with the
+// much wider out-of-order issue stage, this produced a need to carefully
+// schedule micro-ops so that all three decoded each cycle are successfully
+// issued as the reservation station(s) simply don't stay occupied for long.
+// Therefore, IssueWidth is set to the narrower of the two at three, while still
+// modeling the machine as out-of-order.
+
+def IsCPSRDefinedPred : SchedPredicate<[{TII->isCPSRDefined(*MI)}]>;
+def IsCPSRDefinedAndPredicatedPred :
+ SchedPredicate<[{TII->isCPSRDefined(*MI) && TII->isPredicated(*MI)}]>;
+
+// Cortex A57 rev. r1p0 or later (false = r0px)
+def IsR1P0AndLaterPred : SchedPredicate<[{false}]>;
+
+// If Addrmode3 contains register offset (not immediate)
+def IsLdrAm3RegOffPred :
+ SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 1)}]>;
+// The same predicate with operand offset 2 and 3:
+def IsLdrAm3RegOffPredX2 :
+ SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 2)}]>;
+def IsLdrAm3RegOffPredX3 :
+ SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 3)}]>;
+
+// If Addrmode3 contains "minus register"
+def IsLdrAm3NegRegOffPred :
+ SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 1)}]>;
+// The same predicate with operand offset 2 and 3:
+def IsLdrAm3NegRegOffPredX2 :
+ SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 2)}]>;
+def IsLdrAm3NegRegOffPredX3 :
+ SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 3)}]>;
+
+// Load, scaled register offset, not plus LSL2
+def IsLdstsoScaledNotOptimalPredX0 :
+ SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 0)}]>;
+def IsLdstsoScaledNotOptimalPred :
+ SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 1)}]>;
+def IsLdstsoScaledNotOptimalPredX2 :
+ SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 2)}]>;
+
+// Load, scaled register offset
+def IsLdstsoScaledPred :
+ SchedPredicate<[{TII->isLdstScaledReg(*MI, 1)}]>;
+def IsLdstsoScaledPredX2 :
+ SchedPredicate<[{TII->isLdstScaledReg(*MI, 2)}]>;
+
+def IsLdstsoMinusRegPredX0 :
+ SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 0)}]>;
+def IsLdstsoMinusRegPred :
+ SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 1)}]>;
+def IsLdstsoMinusRegPredX2 :
+ SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 2)}]>;
+
+// Load, scaled register offset
+def IsLdrAm2ScaledPred :
+ SchedPredicate<[{TII->isAm2ScaledReg(*MI, 1)}]>;
+
+// LDM, base reg in list
+def IsLdmBaseRegInList :
+ SchedPredicate<[{TII->isLDMBaseRegInList(*MI)}]>;
+
+class A57WriteLMOpsListType<list<SchedWriteRes> writes> {
+ list <SchedWriteRes> Writes = writes;
+ SchedMachineModel SchedModel = ?;
+}
+
+// *** Common description and scheduling model parameters taken from AArch64 ***
+// (AArch64SchedA57.td)
+def CortexA57Model : SchedMachineModel {
+ let IssueWidth = 3; // 3-way decode and dispatch
+ let MicroOpBufferSize = 128; // 128 micro-op re-order buffer
+ let LoadLatency = 4; // Optimistic load latency
+ let MispredictPenalty = 16; // Fetch + Decode/Rename/Dispatch + Branch
+
+ // Enable partial & runtime unrolling.
+ let LoopMicroOpBufferSize = 16;
+ let CompleteModel = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Cortex-A57.
+// Cortex A-57 has 8 pipelines that each has its own 8-entry queue where
+// micro-ops wait for their operands and then issue out-of-order.
+
+def A57UnitB : ProcResource<1>; // Type B micro-ops
+def A57UnitI : ProcResource<2>; // Type I micro-ops
+def A57UnitM : ProcResource<1>; // Type M micro-ops
+def A57UnitL : ProcResource<1>; // Type L micro-ops
+def A57UnitS : ProcResource<1>; // Type S micro-ops
+
+def A57UnitX : ProcResource<1>; // Type X micro-ops (F1)
+def A57UnitW : ProcResource<1>; // Type W micro-ops (F0)
+
+let SchedModel = CortexA57Model in {
+ def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops
+}
+
+let SchedModel = CortexA57Model in {
+
+//===----------------------------------------------------------------------===//
+// Define customized scheduler read/write types specific to the Cortex-A57.
+
+include "ARMScheduleA57WriteRes.td"
+
+// To have "CompleteModel = 1", support of pseudos and special instructions
+def : InstRW<[WriteNoop], (instregex "(t)?BKPT$", "(t2)?CDP(2)?$",
+ "(t2)?CLREX$", "CONSTPOOL_ENTRY$", "COPY_STRUCT_BYVAL_I32$",
+ "(t2)?CPS[123]p$", "(t2)?DBG$", "(t2)?DMB$", "(t2)?DSB$", "ERET$",
+ "(t2|t)?HINT$", "(t)?HLT$", "(t2)?HVC$", "(t2)?ISB$", "ITasm$",
+ "(t2)?RFE(DA|DB|IA|IB)", "(t)?SETEND", "(t2)?SETPAN", "(t2)?SMC", "SPACE",
+ "(t2)?SRS(DA|DB|IA|IB)", "SWP(B)?", "t?TRAP", "UDF$", "t2DCPS", "t2SG",
+ "t2TT", "tCPS", "CMP_SWAP", "t?SVC", "t2IT", "CompilerBarrier")>;
+
+def : InstRW<[WriteNoop], (instregex "VMRS", "VMSR", "FMSTAT")>;
+
+// Specific memory instrs
+def : InstRW<[WriteNoop, WriteNoop], (instregex "(t2)?LDA", "(t2)?LDC", "(t2)?STC",
+ "(t2)?STL", "(t2)?LDREX", "(t2)?STREX", "MEMCPY")>;
+
+// coprocessor moves
+def : InstRW<[WriteNoop, WriteNoop], (instregex
+ "(t2)?MCR(2|R|R2)?$", "(t2)?MRC(2)?$",
+ "(t2)?MRRC(2)?$", "(t2)?MRS(banked|sys|_AR|_M|sys_AR)?$",
+ "(t2)?MSR(banked|i|_AR|_M)?$")>;
+
+// Deprecated instructions
+def : InstRW<[WriteNoop], (instregex "FLDM", "FSTM")>;
+
+// Pseudos
+def : InstRW<[WriteNoop], (instregex "(t2)?ABS$",
+ "(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", "(t2|t)?Int_eh_sjlj",
+ "tLDRpci_pic", "t2SUBS_PC_LR",
+ "JUMPTABLE", "tInt_WIN_eh_sjlj_longjmp",
+ "VLD(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm",
+ "VLD(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm",
+ "VST(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm",
+ "VST(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm",
+ "WIN__CHKSTK", "WIN__DBZCHK")>;
+
+// Miscellaneous
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_1cyc_1I], (instrs COPY)>;
+
+// --- 3.2 Branch Instructions ---
+// B, BX, BL, BLX (imm, reg != LR, reg == LR), CBZ, CBNZ
+
+def : InstRW<[A57Write_1cyc_1B], (instregex "(t2|t)?B$", "t?BX", "(t2|t)?Bcc$",
+ "t?TAILJMP(d|r)", "TCRETURN(d|r)i", "tBfar", "tCBN?Z")>;
+def : InstRW<[A57Write_1cyc_1B_1I],
+ (instregex "t?BL$", "BL_pred$", "t?BLXi", "t?TPsoft")>;
+def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BLX", "tBLX(NS)?r")>;
+// Pseudos
+def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BCCi64", "BCCZi64")>;
+def : InstRW<[A57Write_3cyc_1B_1I], (instregex "BR_JTadd", "t?BR_JTr",
+ "t2BR_JT", "t2BXJ", "(t2)?TB(B|H)(_JT)?$", "tBRIND")>;
+def : InstRW<[A57Write_6cyc_1B_1L], (instregex "BR_JTm")>;
+
+// --- 3.3 Arithmetic and Logical Instructions ---
+// ADD{S}, ADC{S}, ADR, AND{S}, BIC{S}, CMN, CMP, EOR{S}, ORN{S}, ORR{S},
+// RSB{S}, RSC{S}, SUB{S}, SBC{S}, TEQ, TST
+
+def : InstRW<[A57Write_1cyc_1I], (instregex "tADDframe")>;
+
+// shift by register, conditional or unconditional
+// TODO: according to the doc, conditional uses I0/I1, unconditional uses M
+// Why more complex instruction uses more simple pipeline?
+// May be an error in doc.
+def A57WriteALUsi : SchedWriteVariant<[
+ // lsl #2, lsl #1, or lsr #1.
+ SchedVar<IsPredicatedPred, [A57Write_2cyc_1M]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
+]>;
+def A57WriteALUsr : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
+]>;
+def A57WriteALUSsr : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
+]>;
+def A57ReadALUsr : SchedReadVariant<[
+ SchedVar<IsPredicatedPred, [ReadDefault]>,
+ SchedVar<NoSchedPred, [ReadDefault]>
+]>;
+def : SchedAlias<WriteALUsi, A57WriteALUsi>;
+def : SchedAlias<WriteALUsr, A57WriteALUsr>;
+def : SchedAlias<WriteALUSsr, A57WriteALUSsr>;
+def : SchedAlias<ReadALUsr, A57ReadALUsr>;
+
+def A57WriteCMPsr : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
+]>;
+def : SchedAlias<WriteCMP, A57Write_1cyc_1I>;
+def : SchedAlias<WriteCMPsi, A57Write_2cyc_1M>;
+def : SchedAlias<WriteCMPsr, A57WriteCMPsr>;
+
+// --- 3.4 Move and Shift Instructions ---
+// Move, basic
+// MOV{S}, MOVW, MVN{S}
+def : InstRW<[A57Write_1cyc_1I], (instregex "MOV(r|i|i16|r_TC)",
+ "(t2)?MVN(CC)?(r|i)", "BMOVPCB_CALL", "BMOVPCRX_CALL",
+ "MOVCC(r|i|i16|i32imm)", "tMOV", "tMVN")>;
+
+// Move, shift by immed, setflags/no setflags
+// (ASR, LSL, LSR, ROR, RRX)=MOVsi, MVN
+// setflags = isCPSRDefined
+def A57WriteMOVsi : SchedWriteVariant<[
+ SchedVar<IsCPSRDefinedPred, [A57Write_2cyc_1M]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1I]>
+]>;
+def : InstRW<[A57WriteMOVsi], (instregex "MOV(CC)?si", "MVNsi",
+ "ASRi", "(t2|t)ASRri", "LSRi", "(t2|t)LSRri", "LSLi", "(t2|t)LSLri", "RORi",
+ "(t2|t)RORri", "(t2)?RRX", "t2MOV", "tROR")>;
+
+// shift by register, conditional or unconditional, setflags/no setflags
+def A57WriteMOVsr : SchedWriteVariant<[
+ SchedVar<IsCPSRDefinedAndPredicatedPred, [A57Write_2cyc_1I]>,
+ SchedVar<IsCPSRDefinedPred, [A57Write_2cyc_1M]>,
+ SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1I]>
+]>;
+def : InstRW<[A57WriteMOVsr], (instregex "MOV(CC)?sr", "MVNsr", "t2MVNs",
+ "ASRr", "(t2|t)ASRrr", "LSRr", "(t2|t)LSRrr", "LSLr", "(t2|t)?LSLrr", "RORr",
+ "(t2|t)RORrr")>;
+
+// Move, top
+// MOVT - A57Write_2cyc_1M for r0px, A57Write_1cyc_1I for r1p0 and later
+def A57WriteMOVT : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_1cyc_1I]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
+]>;
+def : InstRW<[A57WriteMOVT], (instregex "MOVTi16")>;
+
+def A57WriteI2pc :
+ WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_1cyc_1I]>;
+def A57WriteI2ld :
+ WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_4cyc_1L]>;
+def : InstRW< [A57WriteI2pc], (instregex "MOV_ga_pcrel")>;
+def : InstRW< [A57WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>;
+
+// +2cyc for branch forms
+def : InstRW<[A57Write_3cyc_1I], (instregex "MOVPC(LR|RX)")>;
+
+// --- 3.5 Divide and Multiply Instructions ---
+// Divide: SDIV, UDIV
+// latency from documentration: 4 ­‐ 20, maximum taken
+def : SchedAlias<WriteDIV, A57Write_20cyc_1M>;
+// Multiply: tMul not bound to common WriteRes types
+def : InstRW<[A57Write_3cyc_1M], (instregex "tMUL")>;
+def : SchedAlias<WriteMUL16, A57Write_3cyc_1M>;
+def : SchedAlias<WriteMUL32, A57Write_3cyc_1M>;
+def : ReadAdvance<ReadMUL, 0>;
+
+// Multiply accumulate: MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB,
+// SMLAWT, SMLAD{X}, SMLSD{X}, SMMLA{R}, SMMLS{R}
+// Multiply-accumulate pipelines support late-forwarding of accumulate operands
+// from similar μops, allowing a typical sequence of multiply-accumulate μops
+// to issue one every 1 cycle (sched advance = 2).
+def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
+def A57WriteMLAL : SchedWriteRes<[A57UnitM]> { let Latency = 4; }
+def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>;
+
+def : SchedAlias<WriteMAC16, A57WriteMLA>;
+def : SchedAlias<WriteMAC32, A57WriteMLA>;
+def : SchedAlias<ReadMAC, A57ReadMLA>;
+
+def : SchedAlias<WriteMAC64Lo, A57WriteMLAL>;
+def : SchedAlias<WriteMAC64Hi, A57WriteMLAL>;
+
+// Multiply long: SMULL, UMULL
+def : SchedAlias<WriteMUL64Lo, A57Write_4cyc_1M>;
+def : SchedAlias<WriteMUL64Hi, A57Write_4cyc_1M>;
+
+// --- 3.6 Saturating and Parallel Arithmetic Instructions ---
+// Parallel arith
+// SADD16, SADD8, SSUB16, SSUB8, UADD16, UADD8, USUB16, USUB8
+// Conditional GE-setting instructions require three extra μops
+// and two additional cycles to conditionally update the GE field.
+def A57WriteParArith : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57Write_4cyc_1I_1M]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1I_1M]>
+]>;
+def : InstRW< [A57WriteParArith], (instregex
+ "(t2)?SADD(16|8)", "(t2)?SSUB(16|8)",
+ "(t2)?UADD(16|8)", "(t2)?USUB(16|8)")>;
+
+// Parallel arith with exchange: SASX, SSAX, UASX, USAX
+def A57WriteParArithExch : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57Write_5cyc_1I_1M]>,
+ SchedVar<NoSchedPred, [A57Write_3cyc_1I_1M]>
+]>;
+def : InstRW<[A57WriteParArithExch],
+ (instregex "(t2)?SASX", "(t2)?SSAX", "(t2)?UASX", "(t2)?USAX")>;
+
+// Parallel halving arith
+// SHADD16, SHADD8, SHSUB16, SHSUB8, UHADD16, UHADD8, UHSUB16, UHSUB8
+def : InstRW<[A57Write_2cyc_1M], (instregex
+ "(t2)?SHADD(16|8)", "(t2)?SHSUB(16|8)",
+ "(t2)?UHADD(16|8)", "(t2)?UHSUB(16|8)")>;
+
+// Parallel halving arith with exchange
+// SHASX, SHSAX, UHASX, UHSAX
+def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?SHASX", "(t2)?SHSAX",
+ "(t2)?UHASX", "(t2)?UHSAX")>;
+
+// Parallel saturating arith
+// QADD16, QADD8, QSUB16, QSUB8, UQADD16, UQADD8, UQSUB16, UQSUB8
+def : InstRW<[A57Write_2cyc_1M], (instregex "QADD(16|8)", "QSUB(16|8)",
+ "UQADD(16|8)", "UQSUB(16|8)", "t2(U?)QADD", "t2(U?)QSUB")>;
+
+// Parallel saturating arith with exchange
+// QASX, QSAX, UQASX, UQSAX
+def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QASX", "(t2)?QSAX",
+ "(t2)?UQASX", "(t2)?UQSAX")>;
+
+// Saturate: SSAT, SSAT16, USAT, USAT16
+def : InstRW<[A57Write_2cyc_1M],
+ (instregex "(t2)?SSAT(16)?", "(t2)?USAT(16)?")>;
+
+// Saturating arith: QADD, QSUB
+def : InstRW<[A57Write_2cyc_1M], (instregex "QADD$", "QSUB$")>;
+
+// Saturating doubling arith: QDADD, QDSUB
+def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QDADD", "(t2)?QDSUB")>;
+
+// --- 3.7 Miscellaneous Data-Processing Instructions ---
+// Bit field extract: SBFX, UBFX
+def : InstRW<[A57Write_1cyc_1I], (instregex "(t2)?SBFX", "(t2)?UBFX")>;
+
+// Bit field insert/clear: BFI, BFC
+def : InstRW<[A57Write_2cyc_1M], (instregex "(t2)?BFI", "(t2)?BFC")>;
+
+// Select bytes, conditional/unconditional
+def A57WriteSEL : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1I]>
+]>;
+def : InstRW<[A57WriteSEL], (instregex "(t2)?SEL")>;
+
+// Sign/zero extend, normal: SXTB, SXTH, UXTB, UXTH
+def : InstRW<[A57Write_1cyc_1I],
+ (instregex "(t2|t)?SXT(B|H)$", "(t2|t)?UXT(B|H)$")>;
+
+// Sign/zero extend and add, normal: SXTAB, SXTAH, UXTAB, UXTAH
+def : InstRW<[A57Write_2cyc_1M],
+ (instregex "(t2)?SXTA(B|H)$", "(t2)?UXTA(B|H)$")>;
+
+// Sign/zero extend and add, parallel: SXTAB16, UXTAB16
+def : InstRW<[A57Write_4cyc_1M], (instregex "(t2)?SXTAB16", "(t2)?UXTAB16")>;
+
+// Sum of absolute differences: USAD8, USADA8
+def : InstRW<[A57Write_3cyc_1M], (instregex "(t2)?USAD8", "(t2)?USADA8")>;
+
+// --- 3.8 Load Instructions ---
+
+// Load, immed offset
+// LDR and LDRB have LDRi12 and LDRBi12 forms for immediate
+def : InstRW<[A57Write_4cyc_1L], (instregex "LDRi12", "LDRBi12",
+ "LDRcp", "(t2|t)?LDRConstPool", "LDRLIT_ga_(pcrel|abs)",
+ "PICLDR", "tLDR")>;
+
+def : InstRW<[A57Write_4cyc_1L],
+ (instregex "t2LDRS?(B|H)?(pcrel|T|i8|i12|pci|pci_pic|s)?$")>;
+
+// For "Load, register offset, minus" we need +1cyc, +1I
+def A57WriteLdrAm3 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPred, [A57Write_5cyc_1I_1L]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
+]>;
+def : InstRW<[A57WriteLdrAm3], (instregex "LDR(H|SH|SB)$")>;
+def A57WriteLdrAm3X2 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_5cyc_1I_1L]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
+]>;
+def : InstRW<[A57WriteLdrAm3X2, A57WriteLdrAm3X2], (instregex "LDRD$")>;
+def : InstRW<[A57Write_4cyc_1L, A57Write_4cyc_1L], (instregex "t2LDRDi8")>;
+
+def A57WriteLdrAmLDSTSO : SchedWriteVariant<[
+ SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_5cyc_1I_1L]>,
+ SchedVar<IsLdstsoMinusRegPred, [A57Write_5cyc_1I_1L]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
+]>;
+def : InstRW<[A57WriteLdrAmLDSTSO], (instregex "LDRrs", "LDRBrs")>;
+
+def A57WrBackOne : SchedWriteRes<[]> {
+ let Latency = 1;
+ let NumMicroOps = 0;
+}
+def A57WrBackTwo : SchedWriteRes<[]> {
+ let Latency = 2;
+ let NumMicroOps = 0;
+}
+def A57WrBackThree : SchedWriteRes<[]> {
+ let Latency = 3;
+ let NumMicroOps = 0;
+}
+
+// --- LDR pre-indexed ---
+// Load, immed pre-indexed (4 cyc for load result, 1 cyc for Base update)
+def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR_PRE_IMM",
+ "LDRB_PRE_IMM", "t2LDRB_PRE")>;
+
+// Load, register pre-indexed (4 cyc for load result, 2 cyc for Base update)
+// (5 cyc load result for not-lsl2 scaled)
+def A57WriteLdrAmLDSTSOPre : SchedWriteVariant<[
+ SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_5cyc_1I_1L]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]>
+]>;
+def : InstRW<[A57WriteLdrAmLDSTSOPre, A57WrBackTwo],
+ (instregex "LDR_PRE_REG", "LDRB_PRE_REG")>;
+
+def A57WriteLdrAm3PreWrBack : SchedWriteVariant<[
+ SchedVar<IsLdrAm3RegOffPredX2, [A57WrBackTwo]>,
+ SchedVar<NoSchedPred, [A57WrBackOne]>
+]>;
+def : InstRW<[A57Write_4cyc_1L, A57WriteLdrAm3PreWrBack],
+ (instregex "LDR(H|SH|SB)_PRE")>;
+def : InstRW<[A57Write_4cyc_1L, A57WrBackOne],
+ (instregex "t2LDR(H|SH|SB)?_PRE")>;
+
+// LDRD pre-indexed: 5(2) cyc for reg, 4(1) cyc for imm.
+def A57WriteLdrDAm3Pre : SchedWriteVariant<[
+ SchedVar<IsLdrAm3RegOffPredX3, [A57Write_5cyc_1I_1L]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]>
+]>;
+def A57WriteLdrDAm3PreWrBack : SchedWriteVariant<[
+ SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>,
+ SchedVar<NoSchedPred, [A57WrBackOne]>
+]>;
+def : InstRW<[A57WriteLdrDAm3Pre, A57WriteLdrDAm3Pre, A57WriteLdrDAm3PreWrBack],
+ (instregex "LDRD_PRE")>;
+def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne],
+ (instregex "t2LDRD_PRE")>;
+
+// --- LDR post-indexed ---
+def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR(T?)_POST_IMM",
+ "LDRB(T?)_POST_IMM", "LDR(SB|H|SH)Ti", "t2LDRB_POST")>;
+
+def A57WriteLdrAm3PostWrBack : SchedWriteVariant<[
+ SchedVar<IsLdrAm3RegOffPred, [A57WrBackTwo]>,
+ SchedVar<NoSchedPred, [A57WrBackOne]>
+]>;
+def : InstRW<[A57Write_4cyc_1L_1I, A57WriteLdrAm3PostWrBack],
+ (instregex "LDR(H|SH|SB)_POST")>;
+def : InstRW<[A57Write_4cyc_1L, A57WrBackOne],
+ (instregex "t2LDR(H|SH|SB)?_POST")>;
+
+def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR_POST_REG",
+ "LDRB_POST_REG", "LDR(B?)T_POST$")>;
+
+def A57WriteLdrTRegPost : SchedWriteVariant<[
+ SchedVar<IsLdrAm2ScaledPred, [A57Write_4cyc_1I_1L_1M]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]>
+]>;
+def A57WriteLdrTRegPostWrBack : SchedWriteVariant<[
+ SchedVar<IsLdrAm2ScaledPred, [A57WrBackThree]>,
+ SchedVar<NoSchedPred, [A57WrBackTwo]>
+]>;
+// 4(3) "I0/I1,L,M" for scaled register, otherwise 4(2) "I0/I1,L"
+def : InstRW<[A57WriteLdrTRegPost, A57WriteLdrTRegPostWrBack],
+ (instregex "LDRT_POST_REG", "LDRBT_POST_REG")>;
+
+def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR(SB|H|SH)Tr")>;
+
+def A57WriteLdrAm3PostWrBackX3 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>,
+ SchedVar<NoSchedPred, [A57WrBackOne]>
+]>;
+// LDRD post-indexed: 4(2) cyc for reg, 4(1) cyc for imm.
+def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I,
+ A57WriteLdrAm3PostWrBackX3], (instregex "LDRD_POST")>;
+def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne],
+ (instregex "t2LDRD_POST")>;
+
+// --- Preload instructions ---
+// Preload, immed offset
+def : InstRW<[A57Write_4cyc_1L], (instregex "(t2)?PLDi12", "(t2)?PLDWi12",
+ "t2PLDW?(i8|pci|s)", "(t2)?PLI")>;
+
+// Preload, register offset,
+// 5cyc "I0/I1,L" for minus reg or scaled not plus lsl2
+// otherwise 4cyc "L"
+def A57WritePLD : SchedWriteVariant<[
+ SchedVar<IsLdstsoScaledNotOptimalPredX0, [A57Write_5cyc_1I_1L]>,
+ SchedVar<IsLdstsoMinusRegPredX0, [A57Write_5cyc_1I_1L]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
+]>;
+def : InstRW<[A57WritePLD], (instregex "PLDrs", "PLDWrs")>;
+
+// --- Load multiple instructions ---
+foreach NumAddr = 1-8 in {
+ def A57LMAddrPred#NumAddr :
+ SchedPredicate<"(TII->getLDMVariableDefsSize(*MI)+1)/2 == "#NumAddr>;
+}
+
+def A57LDMOpsListNoregin : A57WriteLMOpsListType<
+ [A57Write_3cyc_1L, A57Write_3cyc_1L,
+ A57Write_4cyc_1L, A57Write_4cyc_1L,
+ A57Write_5cyc_1L, A57Write_5cyc_1L,
+ A57Write_6cyc_1L, A57Write_6cyc_1L,
+ A57Write_7cyc_1L, A57Write_7cyc_1L,
+ A57Write_8cyc_1L, A57Write_8cyc_1L,
+ A57Write_9cyc_1L, A57Write_9cyc_1L,
+ A57Write_10cyc_1L, A57Write_10cyc_1L]>;
+def A57WriteLDMnoreginlist : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, A57LDMOpsListNoregin.Writes[0-1]>,
+ SchedVar<A57LMAddrPred2, A57LDMOpsListNoregin.Writes[0-3]>,
+ SchedVar<A57LMAddrPred3, A57LDMOpsListNoregin.Writes[0-5]>,
+ SchedVar<A57LMAddrPred4, A57LDMOpsListNoregin.Writes[0-7]>,
+ SchedVar<A57LMAddrPred5, A57LDMOpsListNoregin.Writes[0-9]>,
+ SchedVar<A57LMAddrPred6, A57LDMOpsListNoregin.Writes[0-11]>,
+ SchedVar<A57LMAddrPred7, A57LDMOpsListNoregin.Writes[0-13]>,
+ SchedVar<A57LMAddrPred8, A57LDMOpsListNoregin.Writes[0-15]>,
+ SchedVar<NoSchedPred, A57LDMOpsListNoregin.Writes[0-15]>
+]> { let Variadic=1; }
+
+def A57LDMOpsListRegin : A57WriteLMOpsListType<
+ [A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I,
+ A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I,
+ A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I,
+ A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I,
+ A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I,
+ A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
+ A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I,
+ A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I]>;
+def A57WriteLDMreginlist : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, A57LDMOpsListRegin.Writes[0-1]>,
+ SchedVar<A57LMAddrPred2, A57LDMOpsListRegin.Writes[0-3]>,
+ SchedVar<A57LMAddrPred3, A57LDMOpsListRegin.Writes[0-5]>,
+ SchedVar<A57LMAddrPred4, A57LDMOpsListRegin.Writes[0-7]>,
+ SchedVar<A57LMAddrPred5, A57LDMOpsListRegin.Writes[0-9]>,
+ SchedVar<A57LMAddrPred6, A57LDMOpsListRegin.Writes[0-11]>,
+ SchedVar<A57LMAddrPred7, A57LDMOpsListRegin.Writes[0-13]>,
+ SchedVar<A57LMAddrPred8, A57LDMOpsListRegin.Writes[0-15]>,
+ SchedVar<NoSchedPred, A57LDMOpsListRegin.Writes[0-15]>
+]> { let Variadic=1; }
+
+def A57LDMOpsList_Upd : A57WriteLMOpsListType<
+ [A57WrBackOne,
+ A57Write_3cyc_1L_1I, A57Write_3cyc_1L_1I,
+ A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I,
+ A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I,
+ A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I,
+ A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I,
+ A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I,
+ A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
+ A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I]>;
+def A57WriteLDM_Upd : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, A57LDMOpsList_Upd.Writes[0-2]>,
+ SchedVar<A57LMAddrPred2, A57LDMOpsList_Upd.Writes[0-4]>,
+ SchedVar<A57LMAddrPred3, A57LDMOpsList_Upd.Writes[0-6]>,
+ SchedVar<A57LMAddrPred4, A57LDMOpsList_Upd.Writes[0-8]>,
+ SchedVar<A57LMAddrPred5, A57LDMOpsList_Upd.Writes[0-10]>,
+ SchedVar<A57LMAddrPred6, A57LDMOpsList_Upd.Writes[0-12]>,
+ SchedVar<A57LMAddrPred7, A57LDMOpsList_Upd.Writes[0-14]>,
+ SchedVar<A57LMAddrPred8, A57LDMOpsList_Upd.Writes[0-16]>,
+ SchedVar<NoSchedPred, A57LDMOpsList_Upd.Writes[0-16]>
+]> { let Variadic=1; }
+
+def A57WriteLDM : SchedWriteVariant<[
+ SchedVar<IsLdmBaseRegInList, [A57WriteLDMreginlist]>,
+ SchedVar<NoSchedPred, [A57WriteLDMnoreginlist]>
+]> { let Variadic=1; }
+
+def : InstRW<[A57WriteLDM], (instregex "(t|t2|sys)?LDM(IA|DA|DB|IB)$")>;
+
+// TODO: no writeback latency defined in documentation (implemented as 1 cyc)
+def : InstRW<[A57WriteLDM_Upd],
+ (instregex "(t|t2|sys)?LDM(IA_UPD|DA_UPD|DB_UPD|IB_UPD|IA_RET)", "tPOP")>;
+
+// --- 3.9 Store Instructions ---
+
+// Store, immed offset
+def : InstRW<[A57Write_1cyc_1S], (instregex "STRi12", "STRBi12", "PICSTR",
+ "t2STR(B?)(T|i12|i8|s)", "t2STRDi8", "t2STRH(i12|i8|s)", "tSTR")>;
+
+// Store, register offset
+// For minus or for not plus lsl2 scaled we need 3cyc "I0/I1, S",
+// otherwise 1cyc S.
+def A57WriteStrAmLDSTSO : SchedWriteVariant<[
+ SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_3cyc_1I_1S]>,
+ SchedVar<IsLdstsoMinusRegPred, [A57Write_3cyc_1I_1S]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1S]>
+]>;
+def : InstRW<[A57WriteStrAmLDSTSO], (instregex "STRrs", "STRBrs")>;
+
+// STRH,STRD: 3cyc "I0/I1, S" for minus reg, 1cyc S for imm or for plus reg.
+def A57WriteStrAm3 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPred, [A57Write_3cyc_1I_1S]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1S]>
+]>;
+def : InstRW<[A57WriteStrAm3], (instregex "STRH$")>;
+def A57WriteStrAm3X2 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1S]>
+]>;
+def : InstRW<[A57WriteStrAm3X2], (instregex "STRD$")>;
+
+// Store, immed pre-indexed (1cyc "S, I0/I1", 1cyc writeback)
+def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR_PRE_IMM",
+ "STRB_PRE_IMM", "STR(B)?(r|i)_preidx", "(t2)?STRH_(preidx|PRE)",
+ "t2STR(B?)_(PRE|preidx)", "t2STRD_PRE")>;
+
+// Store, register pre-indexed:
+// 1(1) "S, I0/I1" for plus reg
+// 3(2) "I0/I1, S" for minus reg
+// 1(2) "S, M" for scaled plus lsl2
+// 3(2) "I0/I1, S" for other scaled
+def A57WriteStrAmLDSTSOPre : SchedWriteVariant<[
+ SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_3cyc_1I_1S]>,
+ SchedVar<IsLdstsoMinusRegPredX2, [A57Write_3cyc_1I_1S]>,
+ SchedVar<IsLdstsoScaledPredX2, [A57Write_1cyc_1S_1M]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]>
+]>;
+def A57WriteStrAmLDSTSOPreWrBack : SchedWriteVariant<[
+ SchedVar<IsLdstsoScaledPredX2, [A57WrBackTwo]>,
+ SchedVar<IsLdstsoMinusRegPredX2, [A57WrBackTwo]>,
+ SchedVar<NoSchedPred, [A57WrBackOne]>
+]>;
+def : InstRW<[A57WriteStrAmLDSTSOPreWrBack, A57WriteStrAmLDSTSOPre],
+ (instregex "STR_PRE_REG", "STRB_PRE_REG")>;
+
+// pre-indexed STRH/STRD (STRH_PRE, STRD_PRE)
+// 1(1) "S, I0/I1" for imm or reg plus
+// 3(2) "I0/I1, S" for reg minus
+def A57WriteStrAm3PreX2 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]>
+]>;
+def A57WriteStrAm3PreWrBackX2 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPredX2, [A57WrBackTwo]>,
+ SchedVar<NoSchedPred, [A57WrBackOne]>
+]>;
+def : InstRW<[A57WriteStrAm3PreWrBackX2, A57WriteStrAm3PreX2],
+ (instregex "STRH_PRE")>;
+
+def A57WriteStrAm3PreX3 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPredX3, [A57Write_3cyc_1I_1S]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]>
+]>;
+def A57WriteStrAm3PreWrBackX3 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPredX3, [A57WrBackTwo]>,
+ SchedVar<NoSchedPred, [A57WrBackOne]>
+]>;
+def : InstRW<[A57WriteStrAm3PreWrBackX3, A57WriteStrAm3PreX3],
+ (instregex "STRD_PRE")>;
+
+def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR(T?)_POST_IMM",
+ "STRB(T?)_POST_IMM", "t2STR(B?)_POST")>;
+
+// 1(2) "S, M" for STR/STRB register post-indexed (both scaled or not)
+def : InstRW<[A57WrBackTwo, A57Write_1cyc_1S_1M], (instregex "STR(T?)_POST_REG",
+ "STRB(T?)_POST_REG", "STR(B?)T_POST$")>;
+
+// post-indexed STRH/STRD(STRH_POST, STRD_POST), STRHTi, STRHTr
+// 1(1) "S, I0/I1" both for reg or imm
+def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I],
+ (instregex "(t2)?STR(H|D)_POST", "STRHT(i|r)", "t2STRHT")>;
+
+// --- Store multiple instructions ---
+// TODO: no writeback latency defined in documentation
+def A57WriteSTM : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>,
+ SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>,
+ SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>,
+ SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>,
+ SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>,
+ SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>,
+ SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>,
+ SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1S]>
+]>;
+def A57WriteSTM_Upd : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]>
+]>;
+
+def : InstRW<[A57WriteSTM], (instregex "(t2|sys|t)?STM(IA|DA|DB|IB)$")>;
+def : InstRW<[A57WrBackOne, A57WriteSTM_Upd],
+ (instregex "(t2|sys|t)?STM(IA_UPD|DA_UPD|DB_UPD|IB_UPD)", "tPUSH")>;
+
+// --- 3.10 FP Data Processing Instructions ---
+def : SchedAlias<WriteFPALU32, A57Write_5cyc_1V>;
+def : SchedAlias<WriteFPALU64, A57Write_5cyc_1V>;
+
+def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(S|D|H)")>;
+
+// fp compare - 3cyc F1 for unconditional, 6cyc "F0/F1, F1" for conditional
+def A57WriteVcmp : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57Write_6cyc_1V_1X]>,
+ SchedVar<NoSchedPred, [A57Write_3cyc_1X]>
+]>;
+def : InstRW<[A57WriteVcmp],
+ (instregex "VCMP(D|S|H|ZD|ZS|ZH)$", "VCMPE(D|S|H|ZD|ZS|ZH)")>;
+
+// fp convert
+def : InstRW<[A57Write_5cyc_1V], (instregex
+ "VCVT(A|N|P|M)(SH|UH|SS|US|SD|UD)", "VCVT(BDH|THD|TDH)")>;
+
+def : SchedAlias<WriteFPCVT, A57Write_5cyc_1V>;
+
+// FP round to integral
+def : InstRW<[A57Write_5cyc_1V], (instregex "VRINT(A|N|P|M|Z|R|X)(H|S|D)$")>;
+
+// FP divide, FP square root
+def : SchedAlias<WriteFPDIV32, A57Write_17cyc_1W>;
+def : SchedAlias<WriteFPDIV64, A57Write_32cyc_1W>;
+def : SchedAlias<WriteFPSQRT32, A57Write_17cyc_1W>;
+def : SchedAlias<WriteFPSQRT64, A57Write_32cyc_1W>;
+
+// FP max/min
+def : InstRW<[A57Write_5cyc_1V], (instregex "VMAX", "VMIN")>;
+
+// FP multiply-accumulate pipelines support late forwarding of the result
+// from FP multiply μops to the accumulate operands of an
+// FP multiply-accumulate μop. The latter can potentially be issued 1 cycle
+// after the FP multiply μop has been issued
+// FP multiply, FZ
+def A57WriteVMUL : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
+
+def : SchedAlias<WriteFPMUL32, A57WriteVMUL>;
+def : SchedAlias<WriteFPMUL64, A57WriteVMUL>;
+def : ReadAdvance<ReadFPMUL, 0>;
+
+// FP multiply accumulate, FZ: 9cyc "F0/F1" or 4 cyc for sequenced accumulate
+// VFMA, VFMS, VFNMA, VFNMS, VMLA, VMLS, VNMLA, VNMLS
+def A57WriteVFMA : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
+
+// VFMA takes 9 cyc for common case and 4 cyc for VFMA->VFMA chain (5 read adv.)
+// VMUL takes 5 cyc for common case and 1 cyc for VMUL->VFMA chain (4 read adv.)
+// Currently, there is no way to define different read advances for VFMA operand
+// from VFMA or from VMUL, so there will be 5 read advance.
+// Zero latency (instead of one) for VMUL->VFMA shouldn't break something.
+// The same situation with ASIMD VMUL/VFMA instructions
+// def A57ReadVFMA : SchedRead;
+// def : ReadAdvance<A57ReadVFMA, 5, [A57WriteVFMA]>;
+// def : ReadAdvance<A57ReadVFMA, 4, [A57WriteVMUL]>;
+def A57ReadVFMA5 : SchedReadAdvance<5, [A57WriteVFMA, A57WriteVMUL]>;
+
+def : SchedAlias<WriteFPMAC32, A57WriteVFMA>;
+def : SchedAlias<WriteFPMAC64, A57WriteVFMA>;
+def : SchedAlias<ReadFPMAC, A57ReadVFMA5>;
+
+def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG")>;
+def : InstRW<[A57Write_3cyc_1V], (instregex "VSEL")>;
+
+// --- 3.11 FP Miscellaneous Instructions ---
+// VMOV: 3cyc "F0/F1" for imm/reg
+def : InstRW<[A57Write_3cyc_1V], (instregex "FCONST(D|S|H)")>;
+def : InstRW<[A57Write_3cyc_1V], (instregex "VMOV(D|S|H)(cc)?$")>;
+
+// 5cyc L for FP transfer, vfp to core reg,
+// 5cyc L for FP transfer, core reg to vfp
+def : SchedAlias<WriteFPMOV, A57Write_5cyc_1L>;
+// VMOVRRS/VMOVRRD in common code declared with one WriteFPMOV (instead of 2).
+def : InstRW<[A57Write_5cyc_1L, A57Write_5cyc_1L], (instregex "VMOV(RRS|RRD)")>;
+
+// 8cyc "L,F0/F1" for FP transfer, core reg to upper or lower half of vfp D-reg
+def : InstRW<[A57Write_8cyc_1L_1I], (instregex "VMOVDRR")>;
+
+// --- 3.12 FP Load Instructions ---
+def : InstRW<[A57Write_5cyc_1L], (instregex "VLDR(D|S|H)")>;
+
+def : InstRW<[A57Write_5cyc_1L], (instregex "VLDMQIA$")>;
+
+// FP load multiple (VLDM)
+
+def A57VLDMOpsListUncond : A57WriteLMOpsListType<
+ [A57Write_5cyc_1L, A57Write_5cyc_1L,
+ A57Write_6cyc_1L, A57Write_6cyc_1L,
+ A57Write_7cyc_1L, A57Write_7cyc_1L,
+ A57Write_8cyc_1L, A57Write_8cyc_1L,
+ A57Write_9cyc_1L, A57Write_9cyc_1L,
+ A57Write_10cyc_1L, A57Write_10cyc_1L,
+ A57Write_11cyc_1L, A57Write_11cyc_1L,
+ A57Write_12cyc_1L, A57Write_12cyc_1L]>;
+def A57WriteVLDMuncond : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, A57VLDMOpsListUncond.Writes[0-1]>,
+ SchedVar<A57LMAddrPred2, A57VLDMOpsListUncond.Writes[0-3]>,
+ SchedVar<A57LMAddrPred3, A57VLDMOpsListUncond.Writes[0-5]>,
+ SchedVar<A57LMAddrPred4, A57VLDMOpsListUncond.Writes[0-7]>,
+ SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond.Writes[0-9]>,
+ SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond.Writes[0-11]>,
+ SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond.Writes[0-13]>,
+ SchedVar<A57LMAddrPred8, A57VLDMOpsListUncond.Writes[0-15]>,
+ SchedVar<NoSchedPred, A57VLDMOpsListUncond.Writes[0-15]>
+]> { let Variadic=1; }
+
+def A57VLDMOpsListCond : A57WriteLMOpsListType<
+ [A57Write_5cyc_1L, A57Write_6cyc_1L,
+ A57Write_7cyc_1L, A57Write_8cyc_1L,
+ A57Write_9cyc_1L, A57Write_10cyc_1L,
+ A57Write_11cyc_1L, A57Write_12cyc_1L,
+ A57Write_13cyc_1L, A57Write_14cyc_1L,
+ A57Write_15cyc_1L, A57Write_16cyc_1L,
+ A57Write_17cyc_1L, A57Write_18cyc_1L,
+ A57Write_19cyc_1L, A57Write_20cyc_1L]>;
+def A57WriteVLDMcond : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, A57VLDMOpsListCond.Writes[0-1]>,
+ SchedVar<A57LMAddrPred2, A57VLDMOpsListCond.Writes[0-3]>,
+ SchedVar<A57LMAddrPred3, A57VLDMOpsListCond.Writes[0-5]>,
+ SchedVar<A57LMAddrPred4, A57VLDMOpsListCond.Writes[0-7]>,
+ SchedVar<A57LMAddrPred5, A57VLDMOpsListCond.Writes[0-9]>,
+ SchedVar<A57LMAddrPred6, A57VLDMOpsListCond.Writes[0-11]>,
+ SchedVar<A57LMAddrPred7, A57VLDMOpsListCond.Writes[0-13]>,
+ SchedVar<A57LMAddrPred8, A57VLDMOpsListCond.Writes[0-15]>,
+ SchedVar<NoSchedPred, A57VLDMOpsListCond.Writes[0-15]>
+]> { let Variadic=1; }
+
+def A57WriteVLDM : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57WriteVLDMcond]>,
+ SchedVar<NoSchedPred, [A57WriteVLDMuncond]>
+]> { let Variadic=1; }
+
+def : InstRW<[A57WriteVLDM], (instregex "VLDM(DIA|SIA)$")>;
+
+def A57VLDMOpsListUncond_Upd : A57WriteLMOpsListType<
+ [A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I,
+ A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I,
+ A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I,
+ A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I,
+ A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
+ A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I,
+ A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I,
+ A57Write_12cyc_1L_1I, A57Write_12cyc_1L_1I]>;
+def A57WriteVLDMuncond_UPD : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, A57VLDMOpsListUncond_Upd.Writes[0-1]>,
+ SchedVar<A57LMAddrPred2, A57VLDMOpsListUncond_Upd.Writes[0-3]>,
+ SchedVar<A57LMAddrPred3, A57VLDMOpsListUncond_Upd.Writes[0-5]>,
+ SchedVar<A57LMAddrPred4, A57VLDMOpsListUncond_Upd.Writes[0-7]>,
+ SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond_Upd.Writes[0-9]>,
+ SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond_Upd.Writes[0-11]>,
+ SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond_Upd.Writes[0-13]>,
+ SchedVar<A57LMAddrPred8, A57VLDMOpsListUncond_Upd.Writes[0-15]>,
+ SchedVar<NoSchedPred, A57VLDMOpsListUncond_Upd.Writes[0-15]>
+]> { let Variadic=1; }
+
+def A57VLDMOpsListCond_Upd : A57WriteLMOpsListType<
+ [A57Write_5cyc_1L_1I, A57Write_6cyc_1L_1I,
+ A57Write_7cyc_1L_1I, A57Write_8cyc_1L_1I,
+ A57Write_9cyc_1L_1I, A57Write_10cyc_1L_1I,
+ A57Write_11cyc_1L_1I, A57Write_12cyc_1L_1I,
+ A57Write_13cyc_1L_1I, A57Write_14cyc_1L_1I,
+ A57Write_15cyc_1L_1I, A57Write_16cyc_1L_1I,
+ A57Write_17cyc_1L_1I, A57Write_18cyc_1L_1I,
+ A57Write_19cyc_1L_1I, A57Write_20cyc_1L_1I]>;
+def A57WriteVLDMcond_UPD : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, A57VLDMOpsListCond_Upd.Writes[0-1]>,
+ SchedVar<A57LMAddrPred2, A57VLDMOpsListCond_Upd.Writes[0-3]>,
+ SchedVar<A57LMAddrPred3, A57VLDMOpsListCond_Upd.Writes[0-5]>,
+ SchedVar<A57LMAddrPred4, A57VLDMOpsListCond_Upd.Writes[0-7]>,
+ SchedVar<A57LMAddrPred5, A57VLDMOpsListCond_Upd.Writes[0-9]>,
+ SchedVar<A57LMAddrPred6, A57VLDMOpsListCond_Upd.Writes[0-11]>,
+ SchedVar<A57LMAddrPred7, A57VLDMOpsListCond_Upd.Writes[0-13]>,
+ SchedVar<A57LMAddrPred8, A57VLDMOpsListCond_Upd.Writes[0-15]>,
+ SchedVar<NoSchedPred, A57VLDMOpsListCond_Upd.Writes[0-15]>
+]> { let Variadic=1; }
+
+def A57WriteVLDM_UPD : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57WriteVLDMcond_UPD]>,
+ SchedVar<NoSchedPred, [A57WriteVLDMuncond_UPD]>
+]> { let Variadic=1; }
+
+def : InstRW<[A57WrBackOne, A57WriteVLDM_UPD],
+ (instregex "VLDM(DIA_UPD|DDB_UPD|SIA_UPD|SDB_UPD)")>;
+
+// --- 3.13 FP Store Instructions ---
+def : InstRW<[A57Write_1cyc_1S], (instregex "VSTR(D|S|H)")>;
+
+def : InstRW<[A57Write_2cyc_1S], (instregex "VSTMQIA$")>;
+
+def A57WriteVSTMs : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>,
+ SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>,
+ SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>,
+ SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>,
+ SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>,
+ SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>,
+ SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>,
+ SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1S]>
+]>;
+def A57WriteVSTMd : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S]>,
+ SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S]>,
+ SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S]>,
+ SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S]>,
+ SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S]>,
+ SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S]>,
+ SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S]>,
+ SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1S]>
+]>;
+def A57WriteVSTMs_Upd : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]>
+]>;
+def A57WriteVSTMd_Upd : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S_1I]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]>
+]>;
+
+def : InstRW<[A57WriteVSTMs], (instregex "VSTMSIA$")>;
+def : InstRW<[A57WriteVSTMd], (instregex "VSTMDIA$")>;
+def : InstRW<[A57WrBackOne, A57WriteVSTMs_Upd],
+ (instregex "VSTM(SIA_UPD|SDB_UPD)")>;
+def : InstRW<[A57WrBackOne, A57WriteVSTMd_Upd],
+ (instregex "VSTM(DIA_UPD|DDB_UPD)")>;
+
+// --- 3.14 ASIMD Integer Instructions ---
+
+// ASIMD absolute diff, 3cyc F0/F1 for integer VABD
+def : InstRW<[A57Write_3cyc_1V], (instregex "VABD(s|u)")>;
+
+// ASIMD absolute diff accum: 4(1) F1 for D-form, 5(2) F1 for Q-form
+def A57WriteVABAD : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57ReadVABAD : SchedReadAdvance<3, [A57WriteVABAD]>;
+def : InstRW<[A57WriteVABAD, A57ReadVABAD],
+ (instregex "VABA(s|u)(v8i8|v4i16|v2i32)")>;
+def A57WriteVABAQ : SchedWriteRes<[A57UnitX]> { let Latency = 5; }
+def A57ReadVABAQ : SchedReadAdvance<3, [A57WriteVABAQ]>;
+def : InstRW<[A57WriteVABAQ, A57ReadVABAQ],
+ (instregex "VABA(s|u)(v16i8|v8i16|v4i32)")>;
+
+// ASIMD absolute diff accum long: 4(1) F1 for VABAL
+def A57WriteVABAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57ReadVABAL : SchedReadAdvance<3, [A57WriteVABAL]>;
+def : InstRW<[A57WriteVABAL, A57ReadVABAL], (instregex "VABAL(s|u)")>;
+
+// ASIMD absolute diff long: 3cyc F0/F1 for VABDL
+def : InstRW<[A57Write_3cyc_1V], (instregex "VABDL(s|u)")>;
+
+// ASIMD arith, basic
+def : InstRW<[A57Write_3cyc_1V], (instregex "VADD", "VADDL", "VADDW",
+ "VNEG(s8d|s16d|s32d|s8q|s16q|s32q|d|q)",
+ "VPADDi", "VPADDL", "VSUB", "VSUBL", "VSUBW")>;
+
+// ASIMD arith, complex
+def : InstRW<[A57Write_3cyc_1V], (instregex "VABS", "VADDHN", "VHADD", "VHSUB",
+ "VQABS", "VQADD", "VQNEG", "VQSUB",
+ "VRADDHN", "VRHADD", "VRSUBHN", "VSUBHN")>;
+
+// ASIMD compare
+def : InstRW<[A57Write_3cyc_1V],
+ (instregex "VCEQ", "VCGE", "VCGT", "VCLE", "VTST", "VCLT")>;
+
+// ASIMD logical
+def : InstRW<[A57Write_3cyc_1V],
+ (instregex "VAND", "VBIC", "VMVN", "VORR", "VORN", "VEOR")>;
+
+// ASIMD max/min
+def : InstRW<[A57Write_3cyc_1V],
+ (instregex "(VMAX|VMIN)(s|u)", "(VPMAX|VPMIN)(s8|s16|s32|u8|u16|u32)")>;
+
+// ASIMD multiply, D-form: 5cyc F0 for r0px, 4cyc F0 for r1p0 and later
+// Cortex-A57 r1p0 and later reduce the latency of ASIMD multiply
+// and multiply-with-accumulate instructions relative to r0pX.
+def A57WriteVMULD_VecInt : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
+ SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
+def : InstRW<[A57WriteVMULD_VecInt], (instregex
+ "VMUL(v8i8|v4i16|v2i32|pd)", "VMULsl(v4i16|v2i32)",
+ "VQDMULH(sl)?(v4i16|v2i32)", "VQRDMULH(sl)?(v4i16|v2i32)")>;
+
+// ASIMD multiply, Q-form: 6cyc F0 for r0px, 5cyc F0 for r1p0 and later
+def A57WriteVMULQ_VecInt : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>,
+ SchedVar<NoSchedPred, [A57Write_6cyc_1W]>]>;
+def : InstRW<[A57WriteVMULQ_VecInt], (instregex
+ "VMUL(v16i8|v8i16|v4i32|pq)", "VMULsl(v8i16|v4i32)",
+ "VQDMULH(sl)?(v8i16|v4i32)", "VQRDMULH(sl)?(v8i16|v4i32)")>;
+
+// ASIMD multiply accumulate, D-form
+// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence
+// (4 or 3 ReadAdvance)
+def A57WriteVMLAD_VecInt : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
+ SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
+def A57ReadVMLAD_VecInt : SchedReadVariant<[
+ SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAD_VecInt]>]>,
+ SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAD_VecInt]>]>
+]>;
+def : InstRW<[A57WriteVMLAD_VecInt, A57ReadVMLAD_VecInt],
+ (instregex "VMLA(sl)?(v8i8|v4i16|v2i32)", "VMLS(sl)?(v8i8|v4i16|v2i32)")>;
+
+// ASIMD multiply accumulate, Q-form
+// 6cyc F0 for r0px, 5cyc F0 for r1p0 and later, 2cyc for accumulate sequence
+// (4 or 3 ReadAdvance)
+def A57WriteVMLAQ_VecInt : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>,
+ SchedVar<NoSchedPred, [A57Write_6cyc_1W]>]>;
+def A57ReadVMLAQ_VecInt : SchedReadVariant<[
+ SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAQ_VecInt]>]>,
+ SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAQ_VecInt]>]>
+]>;
+def : InstRW<[A57WriteVMLAQ_VecInt, A57ReadVMLAQ_VecInt],
+ (instregex "VMLA(sl)?(v16i8|v8i16|v4i32)", "VMLS(sl)?(v16i8|v8i16|v4i32)")>;
+
+// ASIMD multiply accumulate long
+// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence
+// (4 or 3 ReadAdvance)
+def A57WriteVMLAL_VecInt : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
+ SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
+def A57ReadVMLAL_VecInt : SchedReadVariant<[
+ SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAL_VecInt]>]>,
+ SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAL_VecInt]>]>
+]>;
+def : InstRW<[A57WriteVMLAL_VecInt, A57ReadVMLAL_VecInt],
+ (instregex "VMLAL(s|u)", "VMLSL(s|u)")>;
+
+// ASIMD multiply accumulate saturating long
+// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 2cyc for accumulate sequence
+// (3 or 2 ReadAdvance)
+def A57WriteVQDMLAL_VecInt : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
+ SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
+def A57ReadVQDMLAL_VecInt : SchedReadVariant<[
+ SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<2, [A57WriteVQDMLAL_VecInt]>]>,
+ SchedVar<NoSchedPred, [SchedReadAdvance<3, [A57WriteVQDMLAL_VecInt]>]>
+]>;
+def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt],
+ (instregex "VQDMLAL", "VQDMLSL")>;
+
+// ASIMD multiply long
+// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later
+def A57WriteVMULL_VecInt : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
+ SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
+def : InstRW<[A57WriteVMULL_VecInt],
+ (instregex "VMULL(s|u|p8|sls|slu)", "VQDMULL")>;
+
+// ASIMD pairwise add and accumulate
+// 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance)
+def A57WriteVPADAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57ReadVPADAL : SchedReadAdvance<3, [A57WriteVPADAL]>;
+def : InstRW<[A57WriteVPADAL, A57ReadVPADAL], (instregex "VPADAL(s|u)")>;
+
+// ASIMD shift accumulate
+// 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance)
+def A57WriteVSRA : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57ReadVSRA : SchedReadAdvance<3, [A57WriteVSRA]>;
+def : InstRW<[A57WriteVSRA, A57ReadVSRA], (instregex "VSRA", "VRSRA")>;
+
+// ASIMD shift by immed, basic
+def : InstRW<[A57Write_3cyc_1X],
+ (instregex "VMOVL", "VSHLi", "VSHLL", "VSHR(s|u)", "VSHRN")>;
+
+// ASIMD shift by immed, complex
+def : InstRW<[A57Write_4cyc_1X], (instregex
+ "VQRSHRN", "VQRSHRUN", "VQSHL(si|ui|su)", "VQSHRN", "VQSHRUN", "VRSHR(s|u)",
+ "VRSHRN")>;
+
+// ASIMD shift by immed and insert, basic, D-form
+def : InstRW<[A57Write_4cyc_1X], (instregex
+ "VSLI(v8i8|v4i16|v2i32|v1i64)", "VSRI(v8i8|v4i16|v2i32|v1i64)")>;
+
+// ASIMD shift by immed and insert, basic, Q-form
+def : InstRW<[A57Write_5cyc_1X], (instregex
+ "VSLI(v16i8|v8i16|v4i32|v2i64)", "VSRI(v16i8|v8i16|v4i32|v2i64)")>;
+
+// ASIMD shift by register, basic, D-form
+def : InstRW<[A57Write_3cyc_1X], (instregex
+ "VSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>;
+
+// ASIMD shift by register, basic, Q-form
+def : InstRW<[A57Write_4cyc_1X], (instregex
+ "VSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>;
+
+// ASIMD shift by register, complex, D-form
+// VQRSHL, VQSHL, VRSHL
+def : InstRW<[A57Write_4cyc_1X], (instregex
+ "VQRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)", "VQSHL(s|u)(v8i8|v4i16|v2i32|v1i64)",
+ "VRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>;
+
+// ASIMD shift by register, complex, Q-form
+def : InstRW<[A57Write_5cyc_1X], (instregex
+ "VQRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)", "VQSHL(s|u)(v16i8|v8i16|v4i32|v2i64)",
+ "VRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>;
+
+// --- 3.15 ASIMD Floating-Point Instructions ---
+// ASIMD FP absolute value
+def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(fd|fq|hd|hq)")>;
+
+// ASIMD FP arith
+def : InstRW<[A57Write_5cyc_1V], (instregex "VABD(fd|fq|hd|hq)",
+ "VADD(fd|fq|hd|hq)", "VPADD(f|h)", "VSUB(fd|fq|hd|hq)")>;
+
+// ASIMD FP compare
+def : InstRW<[A57Write_5cyc_1V], (instregex "VAC(GE|GT|LE|LT)",
+ "VC(EQ|GE|GT|LE)(fd|fq|hd|hq)")>;
+
+// ASIMD FP convert, integer
+def : InstRW<[A57Write_5cyc_1V], (instregex
+ "VCVT(f2sd|f2ud|s2fd|u2fd|f2sq|f2uq|s2fq|u2fq|f2xsd|f2xud|xs2fd|xu2fd)",
+ "VCVT(f2xsq|f2xuq|xs2fq|xu2fq)",
+ "VCVT(AN|MN|NN|PN)(SDf|SQf|UDf|UQf|SDh|SQh|UDh|UQh)")>;
+
+// ASIMD FP convert, half-precision: 8cyc F0/F1
+def : InstRW<[A57Write_8cyc_1V], (instregex
+ "VCVT(h2sd|h2ud|s2hd|u2hd|h2sq|h2uq|s2hq|u2hq|h2xsd|h2xud|xs2hd|xu2hd)",
+ "VCVT(h2xsq|h2xuq|xs2hq|xu2hq)",
+ "VCVT(f2h|h2f)")>;
+
+// ASIMD FP max/min
+def : InstRW<[A57Write_5cyc_1V], (instregex
+ "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "VMAXNM", "VMINNM")>;
+
+// ASIMD FP multiply
+def A57WriteVMUL_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
+def : InstRW<[A57WriteVMUL_VecFP], (instregex "VMUL(sl)?(fd|fq|hd|hq)")>;
+
+// ASIMD FP multiply accumulate: 9cyc F0/F1, 4cyc for accumulate sequence
+def A57WriteVMLA_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
+def A57ReadVMLA_VecFP :
+ SchedReadAdvance<5, [A57WriteVMLA_VecFP, A57WriteVMUL_VecFP]>;
+def : InstRW<[A57WriteVMLA_VecFP, A57ReadVMLA_VecFP],
+ (instregex "(VMLA|VMLS)(sl)?(fd|fq|hd|hq)", "(VFMA|VFMS)(fd|fq|hd|hq)")>;
+
+// ASIMD FP negate
+def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG(fd|f32q|hd|hq)")>;
+
+// ASIMD FP round to integral
+def : InstRW<[A57Write_5cyc_1V], (instregex
+ "VRINT(AN|MN|NN|PN|XN|ZN)(Df|Qf|Dh|Qh)")>;
+
+// --- 3.16 ASIMD Miscellaneous Instructions ---
+
+// ASIMD bitwise insert
+def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL")>;
+
+// ASIMD count
+def : InstRW<[A57Write_3cyc_1V], (instregex "VCLS", "VCLZ", "VCNT")>;
+
+// ASIMD duplicate, core reg: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VDUP(8|16|32)(d|q)")>;
+
+// ASIMD duplicate, scalar: 3cyc "F0/F1"
+def : InstRW<[A57Write_3cyc_1V], (instregex "VDUPLN(8|16|32)(d|q)")>;
+
+// ASIMD extract
+def : InstRW<[A57Write_3cyc_1V], (instregex "VEXT(d|q)(8|16|32|64)")>;
+
+// ASIMD move, immed
+def : InstRW<[A57Write_3cyc_1V], (instregex
+ "VMOV(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v1i64|v2i64|v2f32|v4f32)",
+ "VMOVQ0")>;
+
+// ASIMD move, narrowing
+def : InstRW<[A57Write_3cyc_1V], (instregex "VMOVN")>;
+
+// ASIMD move, saturating
+def : InstRW<[A57Write_4cyc_1X], (instregex "VQMOVN")>;
+
+// ASIMD reciprocal estimate
+def : InstRW<[A57Write_5cyc_1V], (instregex "VRECPE", "VRSQRTE")>;
+
+// ASIMD reciprocal step, FZ
+def : InstRW<[A57Write_9cyc_1V], (instregex "VRECPS", "VRSQRTS")>;
+
+// ASIMD reverse, swap, table lookup (1-2 reg)
+def : InstRW<[A57Write_3cyc_1V], (instregex "VREV", "VSWP", "VTB(L|X)(1|2)")>;
+
+// ASIMD table lookup (3-4 reg)
+def : InstRW<[A57Write_6cyc_1V], (instregex "VTBL(3|4)", "VTBX(3|4)")>;
+
+// ASIMD transfer, scalar to core reg: 6cyc "L, I0/I1"
+def : InstRW<[A57Write_6cyc_1L_1I], (instregex "VGETLN")>;
+
+// ASIMD transfer, core reg to scalar: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VSETLN")>;
+
+// ASIMD transpose
+def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V], (instregex "VTRN")>;
+
+// ASIMD unzip/zip, D-form
+def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V],
+ (instregex "VUZPd", "VZIPd")>;
+
+// ASIMD unzip/zip, Q-form
+def : InstRW<[A57Write_6cyc_1V, A57Write_6cyc_1V],
+ (instregex "VUZPq", "VZIPq")>;
+
+// --- 3.17 ASIMD Load Instructions ---
+
+// Overriden via InstRW for this processor.
+def : WriteRes<WriteVLD1, []>;
+def : WriteRes<WriteVLD2, []>;
+def : WriteRes<WriteVLD3, []>;
+def : WriteRes<WriteVLD4, []>;
+def : WriteRes<WriteVST1, []>;
+def : WriteRes<WriteVST2, []>;
+def : WriteRes<WriteVST3, []>;
+def : WriteRes<WriteVST4, []>;
+
+// 1-2 reg: 5cyc L, +I for writeback, 1 cyc wb latency
+def : InstRW<[A57Write_5cyc_1L], (instregex "VLD1(d|q)(8|16|32|64)$")>;
+def : InstRW<[A57Write_5cyc_1L_1I, A57WrBackOne],
+ (instregex "VLD1(d|q)(8|16|32|64)wb")>;
+
+// 3-4 reg: 6cyc L, +I for writeback, 1 cyc wb latency
+def : InstRW<[A57Write_6cyc_1L],
+ (instregex "VLD1(d|q)(8|16|32|64)(T|Q)$", "VLD1d64(T|Q)Pseudo")>;
+
+def : InstRW<[A57Write_6cyc_1L_1I, A57WrBackOne],
+ (instregex "VLD1(d|q)(8|16|32|64)(T|Q)wb")>;
+
+// ASIMD load, 1 element, one lane and all lanes: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex
+ "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], (instregex
+ "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)", "VLD1LNq(8|16|32)Pseudo_UPD")>;
+
+// ASIMD load, 2 element, multiple, 2 reg: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V],
+ (instregex "VLD2(d|q)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD2(d|q)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>;
+
+// ASIMD load, 2 element, multiple, 4 reg: 9cyc "L, F0/F1"
+def : InstRW<[A57Write_9cyc_1L_1V], (instregex "VLD2b(8|16|32)$")>;
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD2b(8|16|32)wb")>;
+
+// ASIMD load, 2 element, one lane and all lanes: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V],
+ (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$",
+ "VLD2LN(d|q)(8|16|32)Pseudo$")>;
+// 2 results + wb result
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V, A57WrBackOne],
+ (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>;
+// 1 result + wb result
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb",
+ "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>;
+
+// ASIMD load, 3 element, multiple, 3 reg: 9cyc "L, F0/F1"
+// 3 results
+def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V],
+ (instregex "VLD3(d|q)(8|16|32)$")>;
+// 1 result
+def : InstRW<[A57Write_9cyc_1L_1V],
+ (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>;
+// 3 results + wb
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
+ A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3(d|q)(8|16|32)_UPD$")>;
+// 1 result + wb
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
+
+// ASIMD load, 3 element, one lane, size 32: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V],
+ (instregex "VLD3LN(d|q)32$",
+ "VLD3LN(d|q)32Pseudo$")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
+ A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3LN(d|q)32_UPD")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3LN(d|q)32Pseudo_UPD")>;
+
+// ASIMD load, 3 element, one lane, size 8/16: 9cyc "L, F0/F1"
+def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V],
+ (instregex "VLD3LN(d|q)(8|16)$",
+ "VLD3LN(d|q)(8|16)Pseudo$")>;
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
+ A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3LN(d|q)(8|16)_UPD")>;
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3LN(d|q)(8|16)Pseudo_UPD")>;
+
+// ASIMD load, 3 element, all lanes: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V],
+ (instregex "VLD3DUP(d|q)(8|16|32)$",
+ "VLD3DUP(d|q)(8|16|32)Pseudo$")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
+ A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3DUP(d|q)(8|16|32)_UPD")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3DUP(d|q)(8|16|32)Pseudo_UPD")>;
+
+// ASIMD load, 4 element, multiple, 4 reg: 9cyc "L, F0/F1"
+def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V,
+ A57Write_9cyc_1L_1V],
+ (instregex "VLD4(d|q)(8|16|32)$")>;
+def : InstRW<[A57Write_9cyc_1L_1V],
+ (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>;
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
+ A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD4(d|q)(8|16|32)_UPD")>;
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
+
+// ASIMD load, 4 element, one lane, size 32: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V,
+ A57Write_8cyc_1L_1V],
+ (instregex "VLD4LN(d|q)32$",
+ "VLD4LN(d|q)32Pseudo$")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
+ A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
+ A57WrBackOne],
+ (instregex "VLD4LN(d|q)32_UPD")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD4LN(d|q)32Pseudo_UPD")>;
+
+// ASIMD load, 4 element, one lane, size 8/16: 9cyc "L, F0/F1"
+def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V,
+ A57Write_9cyc_1L_1V],
+ (instregex "VLD4LN(d|q)(8|16)$",
+ "VLD4LN(d|q)(8|16)Pseudo$")>;
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
+ A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
+ A57WrBackOne],
+ (instregex "VLD4LN(d|q)(8|16)_UPD")>;
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD4LN(d|q)(8|16)Pseudo_UPD")>;
+
+// ASIMD load, 4 element, all lanes: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V,
+ A57Write_8cyc_1L_1V],
+ (instregex "VLD4DUP(d|q)(8|16|32)$",
+ "VLD4DUP(d|q)(8|16|32)Pseudo$")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
+ A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
+ A57WrBackOne],
+ (instregex "VLD4DUP(d|q)(8|16|32)_UPD")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD4DUP(d|q)(8|16|32)Pseudo_UPD")>;
+
+// --- 3.18 ASIMD Store Instructions ---
+
+// ASIMD store, 1 element, multiple, 1 reg: 1cyc S
+def : InstRW<[A57Write_1cyc_1S], (instregex "VST1d(8|16|32|64)$")>;
+def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I],
+ (instregex "VST1d(8|16|32|64)wb")>;
+// ASIMD store, 1 element, multiple, 2 reg: 2cyc S
+def : InstRW<[A57Write_2cyc_1S], (instregex "VST1q(8|16|32|64)$")>;
+def : InstRW<[A57WrBackOne, A57Write_2cyc_1S_1I],
+ (instregex "VST1q(8|16|32|64)wb")>;
+// ASIMD store, 1 element, multiple, 3 reg: 3cyc S
+def : InstRW<[A57Write_3cyc_1S],
+ (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1I],
+ (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>;
+// ASIMD store, 1 element, multiple, 4 reg: 4cyc S
+def : InstRW<[A57Write_4cyc_1S],
+ (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>;
+def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1I],
+ (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>;
+// ASIMD store, 1 element, one lane: 3cyc "F0/F1, S"
+def : InstRW<[A57Write_3cyc_1S_1V],
+ (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
+ (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>;
+// ASIMD store, 2 element, multiple, 2 reg: 3cyc "F0/F1, S"
+def : InstRW<[A57Write_3cyc_1S_1V],
+ (instregex "VST2(d|b)(8|16|32)$")>;
+def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
+ (instregex "VST2(b|d)(8|16|32)wb")>;
+// ASIMD store, 2 element, multiple, 4 reg: 4cyc "F0/F1, S"
+def : InstRW<[A57Write_4cyc_1S_1V],
+ (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I],
+ (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>;
+// ASIMD store, 2 element, one lane: 3cyc "F0/F1, S"
+def : InstRW<[A57Write_3cyc_1S_1V],
+ (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
+ (instregex "VST2LN(d|q)(8|16|32)_UPD",
+ "VST2LN(d|q)(8|16|32)Pseudo_UPD")>;
+// ASIMD store, 3 element, multiple, 3 reg
+def : InstRW<[A57Write_3cyc_1S_1V],
+ (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
+ (instregex "VST3(d|q)(8|16|32)_UPD",
+ "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
+// ASIMD store, 3 element, one lane
+def : InstRW<[A57Write_3cyc_1S_1V],
+ (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
+ (instregex "VST3LN(d|q)(8|16|32)_UPD",
+ "VST3LN(d|q)(8|16|32)Pseudo_UPD")>;
+// ASIMD store, 4 element, multiple, 4 reg
+def : InstRW<[A57Write_4cyc_1S_1V],
+ (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I],
+ (instregex "VST4(d|q)(8|16|32)_UPD",
+ "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
+// ASIMD store, 4 element, one lane
+def : InstRW<[A57Write_3cyc_1S_1V],
+ (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
+ (instregex "VST4LN(d|q)(8|16|32)_UPD",
+ "VST4LN(d|q)(8|16|32)Pseudo_UPD")>;
+
+// --- 3.19 Cryptography Extensions ---
+// Crypto AES ops
+// AESD, AESE, AESIMC, AESMC: 3cyc F0
+def : InstRW<[A57Write_3cyc_1W], (instregex "^AES")>;
+// Crypto polynomial (64x64) multiply long (VMULL.P64): 3cyc F0
+def : InstRW<[A57Write_3cyc_1W], (instregex "^VMULLp64")>;
+// Crypto SHA1 xor ops: 6cyc F0/F1
+def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>;
+// Crypto SHA1 fast ops: 3cyc F0
+def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>;
+// Crypto SHA1 slow ops: 6cyc F0
+def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>;
+// Crypto SHA256 fast ops: 3cyc F0
+def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA256SU0")>;
+// Crypto SHA256 slow ops: 6cyc F0
+def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA256(H|H2|SU1)")>;
+
+// --- 3.20 CRC ---
+def : InstRW<[A57Write_3cyc_1W], (instregex "^(t2)?CRC32")>;
+
+// -----------------------------------------------------------------------------
+// Common definitions
+def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
+def : SchedAlias<WriteALU, A57Write_1cyc_1I>;
+
+def : SchedAlias<WriteBr, A57Write_1cyc_1B>;
+def : SchedAlias<WriteBrL, A57Write_1cyc_1B_1I>;
+def : SchedAlias<WriteBrTbl, A57Write_1cyc_1B_1I>;
+def : SchedAlias<WritePreLd, A57Write_4cyc_1L>;
+
+def : SchedAlias<WriteLd, A57Write_4cyc_1L>;
+def : SchedAlias<WriteST, A57Write_1cyc_1S>;
+def : ReadAdvance<ReadALU, 0>;
+
+} // SchedModel = CortexA57Model
+
diff --git a/lib/Target/ARM/ARMScheduleA57WriteRes.td b/lib/Target/ARM/ARMScheduleA57WriteRes.td
new file mode 100644
index 000000000000..670717dc7c13
--- /dev/null
+++ b/lib/Target/ARM/ARMScheduleA57WriteRes.td
@@ -0,0 +1,323 @@
+//=- ARMScheduleA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains all of the Cortex-A57 specific SchedWriteRes types. The approach
+// below is to define a generic SchedWriteRes for every combination of
+// latency and microOps. The naming conventions is to use a prefix, one field
+// for latency, and one or more microOp count/type designators.
+// Prefix: A57Write
+// Latency: #cyc
+// MicroOp Count/Types: #(B|I|M|L|S|X|W|V)
+//
+// e.g. A57Write_6cyc_1I_6S_4V means the total latency is 6 and there are
+// 11 micro-ops to be issued as follows: one to I pipe, six to S pipes and
+// four to V pipes.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Define Generic 1 micro-op types
+
+def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; }
+def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
+def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
+def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; }
+def A57Write_17cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 17;
+ let ResourceCycles = [17]; }
+def A57Write_18cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 18;
+ let ResourceCycles = [18]; }
+def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19;
+ let ResourceCycles = [19]; }
+def A57Write_20cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 20;
+ let ResourceCycles = [20]; }
+def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; }
+def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; }
+def A57Write_2cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 2; }
+def A57Write_3cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 3; }
+def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; }
+def A57Write_2cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 2; }
+def A57Write_3cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 3; }
+def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; }
+def A57Write_32cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 32;
+ let ResourceCycles = [32]; }
+def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32;
+ let ResourceCycles = [32]; }
+def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35;
+ let ResourceCycles = [35]; }
+def A57Write_3cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
+def A57Write_3cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 3; }
+def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; }
+def A57Write_3cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 3; }
+
+// A57Write_3cyc_1L - A57Write_20cyc_1L
+foreach Lat = 3-20 in {
+ def A57Write_#Lat#cyc_1L : SchedWriteRes<[A57UnitL]> {
+ let Latency = Lat;
+ }
+}
+
+// A57Write_4cyc_1S - A57Write_16cyc_1S
+foreach Lat = 4-16 in {
+ def A57Write_#Lat#cyc_1S : SchedWriteRes<[A57UnitS]> {
+ let Latency = Lat;
+ }
+}
+
+def A57Write_4cyc_1M : SchedWriteRes<[A57UnitL]> { let Latency = 4; }
+def A57Write_4cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57Write_4cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 4; }
+def A57Write_5cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 5; }
+def A57Write_6cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 6; }
+def A57Write_6cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 6; }
+def A57Write_8cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 8; }
+def A57Write_9cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
+def A57Write_6cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 6; }
+def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; }
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 2 micro-op types
+
+def A57Write_64cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 64;
+ let NumMicroOps = 2;
+ let ResourceCycles = [32, 32];
+}
+def A57Write_6cyc_1I_1L : SchedWriteRes<[A57UnitI,
+ A57UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_1V_1X : SchedWriteRes<[A57UnitV,
+ A57UnitX]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_7cyc_1V_1X : SchedWriteRes<[A57UnitV,
+ A57UnitX]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+def A57Write_8cyc_1L_1V : SchedWriteRes<[A57UnitL,
+ A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def A57Write_9cyc_1L_1V : SchedWriteRes<[A57UnitL,
+ A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+def A57Write_9cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+def A57Write_8cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_2L : SchedWriteRes<[A57UnitL, A57UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_1I_1L : SchedWriteRes<[A57UnitI,
+ A57UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_1I_1M : SchedWriteRes<[A57UnitI,
+ A57UnitM]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_10cyc_1L_1V : SchedWriteRes<[A57UnitL,
+ A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+def A57Write_10cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+def A57Write_1cyc_1B_1I : SchedWriteRes<[A57UnitB,
+ A57UnitI]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def A57Write_1cyc_1I_1S : SchedWriteRes<[A57UnitI,
+ A57UnitS]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def A57Write_1cyc_1S_1I : SchedWriteRes<[A57UnitS,
+ A57UnitI]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_1S_1I : SchedWriteRes<[A57UnitS,
+ A57UnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_1S_1I : SchedWriteRes<[A57UnitS,
+ A57UnitI]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_1cyc_1S_1M : SchedWriteRes<[A57UnitS,
+ A57UnitM]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_1B_1I : SchedWriteRes<[A57UnitB,
+ A57UnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_1B_1I : SchedWriteRes<[A57UnitB,
+ A57UnitI]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_1B_1L : SchedWriteRes<[A57UnitB,
+ A57UnitI]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_1I_1M : SchedWriteRes<[A57UnitI,
+ A57UnitM]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_2S : SchedWriteRes<[A57UnitS, A57UnitS]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_36cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 36;
+ let NumMicroOps = 2;
+ let ResourceCycles = [18, 18];
+}
+def A57Write_3cyc_1I_1M : SchedWriteRes<[A57UnitI,
+ A57UnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_4cyc_1I_1M : SchedWriteRes<[A57UnitI,
+ A57UnitM]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+// A57Write_3cyc_1L_1I - A57Write_20cyc_1L_1I
+foreach Lat = 3-20 in {
+ def A57Write_#Lat#cyc_1L_1I : SchedWriteRes<[A57UnitL, A57UnitI]> {
+ let Latency = Lat; let NumMicroOps = 2;
+ }
+}
+
+def A57Write_3cyc_1I_1S : SchedWriteRes<[A57UnitI,
+ A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_1S_1V : SchedWriteRes<[A57UnitS,
+ A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_4cyc_1S_1V : SchedWriteRes<[A57UnitS,
+ A57UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+// A57Write_4cyc_1S_1I - A57Write_16cyc_1S_1I
+foreach Lat = 4-16 in {
+ def A57Write_#Lat#cyc_1S_1I : SchedWriteRes<[A57UnitS, A57UnitI]> {
+ let Latency = Lat; let NumMicroOps = 2;
+ }
+}
+
+def A57Write_4cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 3 micro-op types
+
+def A57Write_10cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+def A57Write_2cyc_1I_2S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+def A57Write_3cyc_1I_1S_1V : SchedWriteRes<[A57UnitI,
+ A57UnitS,
+ A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def A57Write_3cyc_1S_1V_1I : SchedWriteRes<[A57UnitS,
+ A57UnitV,
+ A57UnitI]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def A57Write_4cyc_1S_1V_1I : SchedWriteRes<[A57UnitS,
+ A57UnitV,
+ A57UnitI]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+def A57Write_4cyc_1I_1L_1M : SchedWriteRes<[A57UnitI, A57UnitL, A57UnitM]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+def A57Write_8cyc_1L_1V_1I : SchedWriteRes<[A57UnitL,
+ A57UnitV,
+ A57UnitI]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def A57Write_9cyc_1L_1V_1I : SchedWriteRes<[A57UnitL,
+ A57UnitV,
+ A57UnitI]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index d2630685d91b..af682dd8321c 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -234,6 +234,10 @@ protected:
/// CPSR setting instruction.
bool AvoidCPSRPartialUpdate = false;
+ /// CheapPredicableCPSRDef - If true, disable +1 predication cost
+ /// for instructions updating CPSR. Enabled for Cortex-A57.
+ bool CheapPredicableCPSRDef = false;
+
/// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting
/// movs with shifter operand (i.e. asr, lsl, lsr).
bool AvoidMOVsShifterOperand = false;
@@ -543,6 +547,7 @@ public:
bool nonpipelinedVFP() const { return NonpipelinedVFP; }
bool prefers32BitThumb() const { return Pref32BitThumb; }
bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
+ bool cheapPredicableCPSRDef() const { return CheapPredicableCPSRDef; }
bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
bool hasRetAddrStack() const { return HasRetAddrStack; }
bool hasMPExtension() const { return HasMPExtension; }
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 0fef91ec4d3e..b76da727237c 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -3419,9 +3419,7 @@ int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI,
int NewOpcode = Hexagon::getPredNewOpcode(MI.getOpcode());
if (NewOpcode >= 0)
return NewOpcode;
-
- dbgs() << "Cannot convert to .new: " << getName(MI.getOpcode()) << '\n';
- llvm_unreachable(nullptr);
+ return 0;
}
int HexagonInstrInfo::getDotOldOp(const MachineInstr &MI) const {
diff --git a/lib/Target/Mips/MicroMipsSizeReduction.cpp b/lib/Target/Mips/MicroMipsSizeReduction.cpp
index 4593fc92ca6f..35948e36ad91 100644
--- a/lib/Target/Mips/MicroMipsSizeReduction.cpp
+++ b/lib/Target/Mips/MicroMipsSizeReduction.cpp
@@ -135,6 +135,14 @@ private:
// returns true on success.
static bool ReduceXWtoXWSP(MachineInstr *MI, const ReduceEntry &Entry);
+ // Attempts to reduce LBU/LHU instruction into LBU16/LHU16,
+ // returns true on success.
+ static bool ReduceLXUtoLXU16(MachineInstr *MI, const ReduceEntry &Entry);
+
+ // Attempts to reduce SB/SH instruction into SB16/SH16,
+ // returns true on success.
+ static bool ReduceSXtoSX16(MachineInstr *MI, const ReduceEntry &Entry);
+
// Attempts to reduce arithmetic instructions, returns true on success
static bool ReduceArithmeticInstructions(MachineInstr *MI,
const ReduceEntry &Entry);
@@ -162,10 +170,26 @@ llvm::SmallVector<ReduceEntry, 16> MicroMipsSizeReduce::ReduceTable = {
{RT_OneInstr, OpCodes(Mips::ADDu_MM, Mips::ADDU16_MM),
ReduceArithmeticInstructions, OpInfo(OT_OperandsAll),
ImmField(0, 0, 0, -1)},
+ {RT_OneInstr, OpCodes(Mips::LBu, Mips::LBU16_MM), ReduceLXUtoLXU16,
+ OpInfo(OT_OperandsAll), ImmField(0, -1, 15, 2)},
+ {RT_OneInstr, OpCodes(Mips::LBu_MM, Mips::LBU16_MM), ReduceLXUtoLXU16,
+ OpInfo(OT_OperandsAll), ImmField(0, -1, 15, 2)},
+ {RT_OneInstr, OpCodes(Mips::LHu, Mips::LHU16_MM), ReduceLXUtoLXU16,
+ OpInfo(OT_OperandsAll), ImmField(1, 0, 16, 2)},
+ {RT_OneInstr, OpCodes(Mips::LHu_MM, Mips::LHU16_MM), ReduceLXUtoLXU16,
+ OpInfo(OT_OperandsAll), ImmField(1, 0, 16, 2)},
{RT_OneInstr, OpCodes(Mips::LW, Mips::LWSP_MM), ReduceXWtoXWSP,
OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)},
{RT_OneInstr, OpCodes(Mips::LW_MM, Mips::LWSP_MM), ReduceXWtoXWSP,
OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)},
+ {RT_OneInstr, OpCodes(Mips::SB, Mips::SB16_MM), ReduceSXtoSX16,
+ OpInfo(OT_OperandsAll), ImmField(0, 0, 16, 2)},
+ {RT_OneInstr, OpCodes(Mips::SB_MM, Mips::SB16_MM), ReduceSXtoSX16,
+ OpInfo(OT_OperandsAll), ImmField(0, 0, 16, 2)},
+ {RT_OneInstr, OpCodes(Mips::SH, Mips::SH16_MM), ReduceSXtoSX16,
+ OpInfo(OT_OperandsAll), ImmField(1, 0, 16, 2)},
+ {RT_OneInstr, OpCodes(Mips::SH_MM, Mips::SH16_MM), ReduceSXtoSX16,
+ OpInfo(OT_OperandsAll), ImmField(1, 0, 16, 2)},
{RT_OneInstr, OpCodes(Mips::SUBu, Mips::SUBU16_MM),
ReduceArithmeticInstructions, OpInfo(OT_OperandsAll),
ImmField(0, 0, 0, -1)},
@@ -193,6 +217,13 @@ static bool isMMThreeBitGPRegister(const MachineOperand &MO) {
return false;
}
+// Returns true if the machine operand MO is register $0, $17, or $2-$7.
+static bool isMMSourceRegister(const MachineOperand &MO) {
+ if (MO.isReg() && Mips::GPRMM16ZeroRegClass.contains(MO.getReg()))
+ return true;
+ return false;
+}
+
// Returns true if the operand Op is an immediate value
// and writes the immediate value into variable Imm
static bool GetImm(MachineInstr *MI, unsigned Op, int64_t &Imm) {
@@ -279,6 +310,32 @@ bool MicroMipsSizeReduce::ReduceArithmeticInstructions(
return ReplaceInstruction(MI, Entry);
}
+bool MicroMipsSizeReduce::ReduceLXUtoLXU16(MachineInstr *MI,
+ const ReduceEntry &Entry) {
+
+ if (!ImmInRange(MI, Entry))
+ return false;
+
+ if (!isMMThreeBitGPRegister(MI->getOperand(0)) ||
+ !isMMThreeBitGPRegister(MI->getOperand(1)))
+ return false;
+
+ return ReplaceInstruction(MI, Entry);
+}
+
+bool MicroMipsSizeReduce::ReduceSXtoSX16(MachineInstr *MI,
+ const ReduceEntry &Entry) {
+
+ if (!ImmInRange(MI, Entry))
+ return false;
+
+ if (!isMMSourceRegister(MI->getOperand(0)) ||
+ !isMMThreeBitGPRegister(MI->getOperand(1)))
+ return false;
+
+ return ReplaceInstruction(MI, Entry);
+}
+
bool MicroMipsSizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
bool Modified = false;
MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),
diff --git a/lib/Target/WebAssembly/known_gcc_test_failures.txt b/lib/Target/WebAssembly/known_gcc_test_failures.txt
index 54619589c341..35a67134775a 100644
--- a/lib/Target/WebAssembly/known_gcc_test_failures.txt
+++ b/lib/Target/WebAssembly/known_gcc_test_failures.txt
@@ -88,6 +88,3 @@ pr45695.c wasm-o
pr49279.c wasm-o
pr49390.c wasm-o
pr52286.c wasm-o
-
-# fatal error: error in backend: data symbols must have a size set with .size
-921110-1.c wasm-o
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 0a41f35f9320..5303d7a406ad 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -4753,7 +4753,7 @@ static void scaleShuffleMask(int Scale, ArrayRef<int> Mask,
SmallVectorImpl<int> &ScaledMask) {
assert(0 < Scale && "Unexpected scaling factor");
int NumElts = Mask.size();
- ScaledMask.assign(NumElts * Scale, -1);
+ ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1);
for (int i = 0; i != NumElts; ++i) {
int M = Mask[i];
@@ -5848,17 +5848,39 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
return true;
}
case ISD::SCALAR_TO_VECTOR: {
- // Match against a scalar_to_vector of an extract from a similar vector.
+ // Match against a scalar_to_vector of an extract from a vector,
+ // for PEXTRW/PEXTRB we must handle the implicit zext of the scalar.
SDValue N0 = N.getOperand(0);
- if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- N0.getOperand(0).getValueType() != VT ||
- !isa<ConstantSDNode>(N0.getOperand(1)) ||
- NumElts <= N0.getConstantOperandVal(1) ||
- !N->isOnlyUserOf(N0.getNode()))
+ SDValue SrcExtract;
+
+ if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ N0.getOperand(0).getValueType() == VT) {
+ SrcExtract = N0;
+ } else if (N0.getOpcode() == ISD::AssertZext &&
+ N0.getOperand(0).getOpcode() == X86ISD::PEXTRW &&
+ cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i16) {
+ SrcExtract = N0.getOperand(0);
+ assert(SrcExtract.getOperand(0).getValueType() == MVT::v8i16);
+ } else if (N0.getOpcode() == ISD::AssertZext &&
+ N0.getOperand(0).getOpcode() == X86ISD::PEXTRB &&
+ cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i8) {
+ SrcExtract = N0.getOperand(0);
+ assert(SrcExtract.getOperand(0).getValueType() == MVT::v16i8);
+ }
+
+ if (!SrcExtract || !isa<ConstantSDNode>(SrcExtract.getOperand(1)) ||
+ NumElts <= SrcExtract.getConstantOperandVal(1))
return false;
- Ops.push_back(N0.getOperand(0));
- Mask.push_back(N0.getConstantOperandVal(1));
- Mask.append(NumElts - 1, SM_SentinelUndef);
+
+ SDValue SrcVec = SrcExtract.getOperand(0);
+ EVT SrcVT = SrcVec.getValueType();
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ unsigned NumZeros = (NumBitsPerElt / SrcVT.getScalarSizeInBits()) - 1;
+
+ Ops.push_back(SrcVec);
+ Mask.push_back(SrcExtract.getConstantOperandVal(1));
+ Mask.append(NumZeros, SM_SentinelZero);
+ Mask.append(NumSrcElts - Mask.size(), SM_SentinelUndef);
return true;
}
case X86ISD::PINSRB:
@@ -6542,12 +6564,12 @@ static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i);
Constant *Const;
if (VT.isFloatingPoint()) {
- assert((ScalarSize == 32 || ScalarSize == 64) &&
- "Unsupported floating point scalar size");
- if (ScalarSize == 32)
- Const = ConstantFP::get(Type::getFloatTy(C), Val.bitsToFloat());
- else
- Const = ConstantFP::get(Type::getDoubleTy(C), Val.bitsToDouble());
+ if (ScalarSize == 32) {
+ Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val));
+ } else {
+ assert(ScalarSize == 64 && "Unsupported floating point scalar size");
+ Const = ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val));
+ }
} else
Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val);
ConstantVec.push_back(Const);
@@ -6633,11 +6655,13 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
// AVX have support for 32 and 64 bit broadcast for floats only.
// No 64bit integer in 32bit subtarget.
MVT CVT = MVT::getFloatingPointVT(SplatBitSize);
- Constant *C = SplatBitSize == 32
- ? ConstantFP::get(Type::getFloatTy(*Ctx),
- SplatValue.bitsToFloat())
- : ConstantFP::get(Type::getDoubleTy(*Ctx),
- SplatValue.bitsToDouble());
+ // Lower the splat via APFloat directly, to avoid any conversion.
+ Constant *C =
+ SplatBitSize == 32
+ ? ConstantFP::get(*Ctx,
+ APFloat(APFloat::IEEEsingle(), SplatValue))
+ : ConstantFP::get(*Ctx,
+ APFloat(APFloat::IEEEdouble(), SplatValue));
SDValue CP = DAG.getConstantPool(C, PVT);
unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
@@ -8003,7 +8027,7 @@ static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) {
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
ArrayRef<int> Mask,
SmallVectorImpl<int> &RepeatedMask) {
- int LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
+ auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
RepeatedMask.assign(LaneSize, -1);
int Size = Mask.size();
for (int i = 0; i < Size; ++i) {
@@ -16997,7 +17021,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
MVT VT = Op.getSimpleValueType();
- ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ ISD::CondCode Cond = cast<CondCodeSDNode>(CC)->get();
bool isFP = Op.getOperand(1).getSimpleValueType().isFloatingPoint();
SDLoc dl(Op);
@@ -17024,18 +17048,18 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// TODO: This can be avoided if Intel (and only Intel as of 2016) AVX is
// available.
SDValue Cmp;
- unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1);
+ unsigned SSECC = translateX86FSETCC(Cond, Op0, Op1);
if (SSECC == 8) {
// LLVM predicate is SETUEQ or SETONE.
unsigned CC0, CC1;
unsigned CombineOpc;
- if (SetCCOpcode == ISD::SETUEQ) {
+ if (Cond == ISD::SETUEQ) {
CC0 = 3; // UNORD
CC1 = 0; // EQ
CombineOpc = Opc == X86ISD::CMPP ? static_cast<unsigned>(X86ISD::FOR) :
static_cast<unsigned>(ISD::OR);
} else {
- assert(SetCCOpcode == ISD::SETONE);
+ assert(Cond == ISD::SETONE);
CC0 = 7; // ORD
CC1 = 4; // NEQ
CombineOpc = Opc == X86ISD::CMPP ? static_cast<unsigned>(X86ISD::FAND) :
@@ -17082,7 +17106,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// 2. The original operand type has been promoted to a 256-bit vector.
//
// Note that condition 2. only applies for AVX targets.
- SDValue NewOp = DAG.getSetCC(dl, VTOp0, Op0, Op1, SetCCOpcode);
+ SDValue NewOp = DAG.getSetCC(dl, VTOp0, Op0, Op1, Cond);
return DAG.getZExtOrTrunc(NewOp, dl, VT);
}
@@ -17122,7 +17146,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
VT == MVT::v4i32 || VT == MVT::v2i64) && Subtarget.hasXOP()) {
// Translate compare code to XOP PCOM compare mode.
unsigned CmpMode = 0;
- switch (SetCCOpcode) {
+ switch (Cond) {
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETULT:
case ISD::SETLT: CmpMode = 0x00; break;
@@ -17137,60 +17161,49 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
}
// Are we comparing unsigned or signed integers?
- unsigned Opc = ISD::isUnsignedIntSetCC(SetCCOpcode)
- ? X86ISD::VPCOMU : X86ISD::VPCOM;
+ unsigned Opc =
+ ISD::isUnsignedIntSetCC(Cond) ? X86ISD::VPCOMU : X86ISD::VPCOM;
return DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(CmpMode, dl, MVT::i8));
}
- // We are handling one of the integer comparisons here. Since SSE only has
+ // We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
// operations may be required for some comparisons.
- unsigned Opc;
- bool Swap = false, Invert = false, FlipSigns = false, MinMax = false;
- bool Subus = false;
-
- switch (SetCCOpcode) {
- default: llvm_unreachable("Unexpected SETCC condition");
- case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
- case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break;
- case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
- case ISD::SETGT: Opc = X86ISD::PCMPGT; break;
- case ISD::SETGE: Swap = true; LLVM_FALLTHROUGH;
- case ISD::SETLE: Opc = X86ISD::PCMPGT;
- Invert = true; break;
- case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH;
- case ISD::SETUGT: Opc = X86ISD::PCMPGT;
- FlipSigns = true; break;
- case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH;
- case ISD::SETULE: Opc = X86ISD::PCMPGT;
- FlipSigns = true; Invert = true; break;
- }
+ unsigned Opc = (Cond == ISD::SETEQ || Cond == ISD::SETNE) ? X86ISD::PCMPEQ
+ : X86ISD::PCMPGT;
+ bool Swap = Cond == ISD::SETLT || Cond == ISD::SETULT ||
+ Cond == ISD::SETGE || Cond == ISD::SETUGE;
+ bool Invert = Cond == ISD::SETNE ||
+ (Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond));
+ bool FlipSigns = ISD::isUnsignedIntSetCC(Cond);
// Special case: Use min/max operations for SETULE/SETUGE
MVT VET = VT.getVectorElementType();
- bool hasMinMax =
- (Subtarget.hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32))
- || (Subtarget.hasSSE2() && (VET == MVT::i8));
-
- if (hasMinMax) {
- switch (SetCCOpcode) {
+ bool HasMinMax =
+ (Subtarget.hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32)) ||
+ (Subtarget.hasSSE2() && (VET == MVT::i8));
+ bool MinMax = false;
+ if (HasMinMax) {
+ switch (Cond) {
default: break;
case ISD::SETULE: Opc = ISD::UMIN; MinMax = true; break;
case ISD::SETUGE: Opc = ISD::UMAX; MinMax = true; break;
}
- if (MinMax) { Swap = false; Invert = false; FlipSigns = false; }
+ if (MinMax)
+ Swap = Invert = FlipSigns = false;
}
- bool hasSubus = Subtarget.hasSSE2() && (VET == MVT::i8 || VET == MVT::i16);
- if (!MinMax && hasSubus) {
+ bool HasSubus = Subtarget.hasSSE2() && (VET == MVT::i8 || VET == MVT::i16);
+ bool Subus = false;
+ if (!MinMax && HasSubus) {
// As another special case, use PSUBUS[BW] when it's profitable. E.g. for
// Op0 u<= Op1:
// t = psubus Op0, Op1
// pcmpeq t, <0..0>
- switch (SetCCOpcode) {
+ switch (Cond) {
default: break;
case ISD::SETULT: {
// If the comparison is against a constant we can turn this into a
diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp
index 613b4a7f03e9..626a891f65c6 100644
--- a/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -228,7 +228,7 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape,
SmallVector<ReturnInst *, 4> Returns;
- CloneFunctionInto(NewF, &F, VMap, /*ModuleLevelChanges=*/false, Returns);
+ CloneFunctionInto(NewF, &F, VMap, /*ModuleLevelChanges=*/true, Returns);
// Remove old returns.
for (ReturnInst *Return : Returns)
diff --git a/lib/Transforms/Coroutines/Coroutines.cpp b/lib/Transforms/Coroutines/Coroutines.cpp
index ea48043f9381..44e1f9b404ed 100644
--- a/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/lib/Transforms/Coroutines/Coroutines.cpp
@@ -218,6 +218,8 @@ void coro::Shape::buildFrom(Function &F) {
size_t FinalSuspendIndex = 0;
clear(*this);
SmallVector<CoroFrameInst *, 8> CoroFrames;
+ SmallVector<CoroSaveInst *, 2> UnusedCoroSaves;
+
for (Instruction &I : instructions(F)) {
if (auto II = dyn_cast<IntrinsicInst>(&I)) {
switch (II->getIntrinsicID()) {
@@ -229,6 +231,12 @@ void coro::Shape::buildFrom(Function &F) {
case Intrinsic::coro_frame:
CoroFrames.push_back(cast<CoroFrameInst>(II));
break;
+ case Intrinsic::coro_save:
+ // After optimizations, coro_suspends using this coro_save might have
+ // been removed, remember orphaned coro_saves to remove them later.
+ if (II->use_empty())
+ UnusedCoroSaves.push_back(cast<CoroSaveInst>(II));
+ break;
case Intrinsic::coro_suspend:
CoroSuspends.push_back(cast<CoroSuspendInst>(II));
if (CoroSuspends.back()->isFinal()) {
@@ -311,4 +319,8 @@ void coro::Shape::buildFrom(Function &F) {
if (HasFinalSuspend &&
FinalSuspendIndex != CoroSuspends.size() - 1)
std::swap(CoroSuspends[FinalSuspendIndex], CoroSuspends.back());
+
+ // Remove orphaned coro.saves.
+ for (CoroSaveInst *CoroSave : UnusedCoroSaves)
+ CoroSave->eraseFromParent();
}
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 28cc81c76d4f..5cc29a493798 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -1188,6 +1188,10 @@ static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
SCCNodes.insert(F);
}
+ // Skip it if the SCC only contains optnone functions.
+ if (SCCNodes.empty())
+ return Changed;
+
Changed |= addArgumentReturnedAttrs(SCCNodes);
Changed |= addReadAttrs(SCCNodes, AARGetter);
Changed |= addArgumentAttrs(SCCNodes);
diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp
index 231487923fad..6d34ab8b0d96 100644
--- a/lib/Transforms/IPO/FunctionImport.cpp
+++ b/lib/Transforms/IPO/FunctionImport.cpp
@@ -292,8 +292,7 @@ static void computeImportForFunction(
static void ComputeImportForModule(
const GVSummaryMapTy &DefinedGVSummaries, const ModuleSummaryIndex &Index,
FunctionImporter::ImportMapTy &ImportList,
- StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr,
- const DenseSet<GlobalValue::GUID> *DeadSymbols = nullptr) {
+ StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) {
// Worklist contains the list of function imported in this module, for which
// we will analyse the callees and may import further down the callgraph.
SmallVector<EdgeInfo, 128> Worklist;
@@ -301,7 +300,7 @@ static void ComputeImportForModule(
// Populate the worklist with the import for the functions in the current
// module
for (auto &GVSummary : DefinedGVSummaries) {
- if (DeadSymbols && DeadSymbols->count(GVSummary.first)) {
+ if (!Index.isGlobalValueLive(GVSummary.second)) {
DEBUG(dbgs() << "Ignores Dead GUID: " << GVSummary.first << "\n");
continue;
}
@@ -344,15 +343,14 @@ void llvm::ComputeCrossModuleImport(
const ModuleSummaryIndex &Index,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
StringMap<FunctionImporter::ImportMapTy> &ImportLists,
- StringMap<FunctionImporter::ExportSetTy> &ExportLists,
- const DenseSet<GlobalValue::GUID> *DeadSymbols) {
+ StringMap<FunctionImporter::ExportSetTy> &ExportLists) {
// For each module that has function defined, compute the import/export lists.
for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
auto &ImportList = ImportLists[DefinedGVSummaries.first()];
DEBUG(dbgs() << "Computing import for Module '"
<< DefinedGVSummaries.first() << "'\n");
ComputeImportForModule(DefinedGVSummaries.second, Index, ImportList,
- &ExportLists, DeadSymbols);
+ &ExportLists);
}
// When computing imports we added all GUIDs referenced by anything
@@ -414,82 +412,71 @@ void llvm::ComputeCrossModuleImportForModule(
#endif
}
-DenseSet<GlobalValue::GUID> llvm::computeDeadSymbols(
- const ModuleSummaryIndex &Index,
+void llvm::computeDeadSymbols(
+ ModuleSummaryIndex &Index,
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
+ assert(!Index.withGlobalValueDeadStripping());
if (!ComputeDead)
- return DenseSet<GlobalValue::GUID>();
+ return;
if (GUIDPreservedSymbols.empty())
// Don't do anything when nothing is live, this is friendly with tests.
- return DenseSet<GlobalValue::GUID>();
- DenseSet<ValueInfo> LiveSymbols;
+ return;
+ unsigned LiveSymbols = 0;
SmallVector<ValueInfo, 128> Worklist;
Worklist.reserve(GUIDPreservedSymbols.size() * 2);
for (auto GUID : GUIDPreservedSymbols) {
ValueInfo VI = Index.getValueInfo(GUID);
if (!VI)
continue;
- DEBUG(dbgs() << "Live root: " << VI.getGUID() << "\n");
- LiveSymbols.insert(VI);
- Worklist.push_back(VI);
+ for (auto &S : VI.getSummaryList())
+ S->setLive(true);
}
+
// Add values flagged in the index as live roots to the worklist.
- for (const auto &Entry : Index) {
- bool IsLiveRoot = llvm::any_of(
- Entry.second.SummaryList,
- [&](const std::unique_ptr<llvm::GlobalValueSummary> &Summary) {
- return Summary->liveRoot();
- });
- if (!IsLiveRoot)
- continue;
- DEBUG(dbgs() << "Live root (summary): " << Entry.first << "\n");
- Worklist.push_back(ValueInfo(&Entry));
- }
+ for (const auto &Entry : Index)
+ for (auto &S : Entry.second.SummaryList)
+ if (S->isLive()) {
+ DEBUG(dbgs() << "Live root: " << Entry.first << "\n");
+ Worklist.push_back(ValueInfo(&Entry));
+ ++LiveSymbols;
+ break;
+ }
+
+ // Make value live and add it to the worklist if it was not live before.
+ // FIXME: we should only make the prevailing copy live here
+ auto visit = [&](ValueInfo VI) {
+ for (auto &S : VI.getSummaryList())
+ if (S->isLive())
+ return;
+ for (auto &S : VI.getSummaryList())
+ S->setLive(true);
+ ++LiveSymbols;
+ Worklist.push_back(VI);
+ };
while (!Worklist.empty()) {
auto VI = Worklist.pop_back_val();
-
- // FIXME: we should only make the prevailing copy live here
for (auto &Summary : VI.getSummaryList()) {
- for (auto Ref : Summary->refs()) {
- if (LiveSymbols.insert(Ref).second) {
- DEBUG(dbgs() << "Marking live (ref): " << Ref.getGUID() << "\n");
- Worklist.push_back(Ref);
- }
- }
- if (auto *FS = dyn_cast<FunctionSummary>(Summary.get())) {
- for (auto Call : FS->calls()) {
- if (LiveSymbols.insert(Call.first).second) {
- DEBUG(dbgs() << "Marking live (call): " << Call.first.getGUID()
- << "\n");
- Worklist.push_back(Call.first);
- }
- }
- }
+ for (auto Ref : Summary->refs())
+ visit(Ref);
+ if (auto *FS = dyn_cast<FunctionSummary>(Summary.get()))
+ for (auto Call : FS->calls())
+ visit(Call.first);
if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
auto AliaseeGUID = AS->getAliasee().getOriginalName();
ValueInfo AliaseeVI = Index.getValueInfo(AliaseeGUID);
- if (AliaseeVI && LiveSymbols.insert(AliaseeVI).second) {
- DEBUG(dbgs() << "Marking live (alias): " << AliaseeGUID << "\n");
- Worklist.push_back(AliaseeVI);
- }
+ if (AliaseeVI)
+ visit(AliaseeVI);
}
}
}
- DenseSet<GlobalValue::GUID> DeadSymbols;
- DeadSymbols.reserve(
- std::min(Index.size(), Index.size() - LiveSymbols.size()));
- for (auto &Entry : Index) {
- if (!LiveSymbols.count(ValueInfo(&Entry))) {
- DEBUG(dbgs() << "Marking dead: " << Entry.first << "\n");
- DeadSymbols.insert(Entry.first);
- }
- }
- DEBUG(dbgs() << LiveSymbols.size() << " symbols Live, and "
- << DeadSymbols.size() << " symbols Dead \n");
- NumDeadSymbols += DeadSymbols.size();
- NumLiveSymbols += LiveSymbols.size();
- return DeadSymbols;
+ Index.setWithGlobalValueDeadStripping();
+
+ unsigned DeadSymbols = Index.size() - LiveSymbols;
+ DEBUG(dbgs() << LiveSymbols << " symbols Live, and " << DeadSymbols
+ << " symbols Dead \n");
+ NumDeadSymbols += DeadSymbols;
+ NumLiveSymbols += LiveSymbols;
}
/// Compute the set of summaries needed for a ThinLTO backend compilation of
diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp
index ca4ee92f971a..7bec50d9d25f 100644
--- a/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -1442,9 +1442,8 @@ bool LowerTypeTestsModule::lower() {
for (auto &P : *ExportSummary) {
for (auto &S : P.second.SummaryList) {
auto *FS = dyn_cast<FunctionSummary>(S.get());
- if (!FS)
+ if (!FS || !ExportSummary->isGlobalValueLive(FS))
continue;
- // FIXME: Only add live functions.
for (GlobalValue::GUID G : FS->type_tests())
for (Metadata *MD : MetadataByGUID[G])
AddTypeIdUse(MD).IsExported = true;
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index bc0967448cdd..ea805efc66b7 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -68,6 +68,10 @@ static cl::opt<int>
cl::desc("Relative frequency of outline region to "
"the entry block"));
+static cl::opt<unsigned> ExtraOutliningPenalty(
+ "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
+ cl::desc("A debug option to add additional penalty to the computed one."));
+
namespace {
struct FunctionOutliningInfo {
@@ -83,7 +87,7 @@ struct FunctionOutliningInfo {
SmallVector<BasicBlock *, 4> Entries;
// The return block that is not included in the outlined region.
BasicBlock *ReturnBlock;
- // The dominating block of the region ot be outlined.
+ // The dominating block of the region to be outlined.
BasicBlock *NonReturnBlock;
// The set of blocks in Entries that that are predecessors to ReturnBlock
SmallVector<BasicBlock *, 4> ReturnBlockPreds;
@@ -407,11 +411,23 @@ BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
if (hasProfileData(F, OI))
return OutlineRegionRelFreq;
- // When profile data is not available, we need to be very
- // conservative in estimating the overall savings. We need to make sure
- // the outline region relative frequency is not below the threshold
- // specified by the option.
- OutlineRegionRelFreq = std::max(OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
+ // When profile data is not available, we need to be conservative in
+ // estimating the overall savings. Static branch prediction can usually
+ // guess the branch direction right (taken/non-taken), but the guessed
+ // branch probability is usually not biased enough. In case when the
+ // outlined region is predicted to be likely, its probability needs
+ // to be made higher (more biased) to not under-estimate the cost of
+ // function outlining. On the other hand, if the outlined region
+ // is predicted to be less likely, the predicted probablity is usually
+ // higher than the actual. For instance, the actual probability of the
+ // less likely target is only 5%, but the guessed probablity can be
+ // 40%. In the latter case, there is no need for further adjustement.
+ // FIXME: add an option for this.
+ if (OutlineRegionRelFreq < BranchProbability(45, 100))
+ return OutlineRegionRelFreq;
+
+ OutlineRegionRelFreq = std::max(
+ OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
return OutlineRegionRelFreq;
}
@@ -496,6 +512,26 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
if (isa<DbgInfoIntrinsic>(I))
continue;
+ switch (I->getOpcode()) {
+ case Instruction::BitCast:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::Alloca:
+ continue;
+ case Instruction::GetElementPtr:
+ if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
+ continue;
+ default:
+ break;
+ }
+
+ IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(I);
+ if (IntrInst) {
+ if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
+ IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
+ continue;
+ }
+
if (CallInst *CI = dyn_cast<CallInst>(I)) {
InlineCost += getCallsiteCost(CallSite(CI), DL);
continue;
@@ -519,7 +555,13 @@ std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts(
Function *F, const FunctionOutliningInfo *OI, Function *OutlinedFunction,
BasicBlock *OutliningCallBB) {
// First compute the cost of the outlined region 'OI' in the original
- // function 'F':
+ // function 'F'.
+ // FIXME: The code extractor (outliner) can now do code sinking/hoisting
+ // to reduce outlining cost. The hoisted/sunk code currently do not
+ // incur any runtime cost so it is still OK to compare the outlined
+ // function cost with the outlined region in the original function.
+ // If this ever changes, we will need to introduce new extractor api
+ // to pass the information.
int OutlinedRegionCost = 0;
for (BasicBlock &BB : *F) {
if (&BB != OI->ReturnBlock &&
@@ -542,8 +584,14 @@ std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts(
assert(OutlinedFunctionCost >= OutlinedRegionCost &&
"Outlined function cost should be no less than the outlined region");
- int OutliningRuntimeOverhead =
- OutliningFuncCallCost + (OutlinedFunctionCost - OutlinedRegionCost);
+ // The code extractor introduces a new root and exit stub blocks with
+ // additional unconditional branches. Those branches will be eliminated
+ // later with bb layout. The cost should be adjusted accordingly:
+ OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;
+
+ int OutliningRuntimeOverhead = OutliningFuncCallCost +
+ (OutlinedFunctionCost - OutlinedRegionCost) +
+ ExtraOutliningPenalty;
return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead,
OutlinedRegionCost);
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 9fd3a9021a27..16fba32e9805 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -157,7 +157,7 @@ static cl::opt<bool>
static cl::opt<bool> EnableGVNSink(
"enable-gvn-sink", cl::init(false), cl::Hidden,
- cl::desc("Enable the GVN sinking pass (default = on)"));
+ cl::desc("Enable the GVN sinking pass (default = off)"));
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 2c2b7317a1c0..c0798e164c39 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4508,13 +4508,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Builder->CreateAnd(A, B),
Op1);
- // ~x < ~y --> y < x
- // ~x < cst --> ~cst < x
+ // ~X < ~Y --> Y < X
+ // ~X < C --> X > ~C
if (match(Op0, m_Not(m_Value(A)))) {
if (match(Op1, m_Not(m_Value(B))))
return new ICmpInst(I.getPredicate(), B, A);
- if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1))
- return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
+
+ const APInt *C;
+ if (match(Op1, m_APInt(C)))
+ return new ICmpInst(I.getSwappedPredicate(), A,
+ ConstantInt::get(Op1->getType(), ~(*C)));
}
Instruction *AddI = nullptr;
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index ff753c20a94a..df4ee9969c02 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2087,6 +2087,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
switch (I.getNumArgOperands()) {
case 3:
assert(isa<ConstantInt>(I.getArgOperand(2)) && "Invalid rounding mode");
+ LLVM_FALLTHROUGH;
case 2:
CopyOp = I.getArgOperand(0);
ConvertOp = I.getArgOperand(1);
diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 325b64cd8b43..8aa40d1759de 100644
--- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -57,6 +57,11 @@ static const char *const SanCovTracePCGuardName =
"__sanitizer_cov_trace_pc_guard";
static const char *const SanCovTracePCGuardInitName =
"__sanitizer_cov_trace_pc_guard_init";
+static const char *const SanCov8bitCountersInitName =
+ "__sanitizer_cov_8bit_counters_init";
+
+static const char *const SanCovGuardsSectionName = "sancov_guards";
+static const char *const SanCovCountersSectionName = "sancov_counters";
static cl::opt<int> ClCoverageLevel(
"sanitizer-coverage-level",
@@ -64,14 +69,18 @@ static cl::opt<int> ClCoverageLevel(
"3: all blocks and critical edges"),
cl::Hidden, cl::init(0));
-static cl::opt<bool> ClExperimentalTracePC("sanitizer-coverage-trace-pc",
- cl::desc("Experimental pc tracing"),
- cl::Hidden, cl::init(false));
+static cl::opt<bool> ClTracePC("sanitizer-coverage-trace-pc",
+ cl::desc("Experimental pc tracing"), cl::Hidden,
+ cl::init(false));
static cl::opt<bool> ClTracePCGuard("sanitizer-coverage-trace-pc-guard",
cl::desc("pc tracing with a guard"),
cl::Hidden, cl::init(false));
+static cl::opt<bool> ClInline8bitCounters("sanitizer-coverage-inline-8bit-counters",
+ cl::desc("increments 8-bit counter for every edge"),
+ cl::Hidden, cl::init(false));
+
static cl::opt<bool>
ClCMPTracing("sanitizer-coverage-trace-compares",
cl::desc("Tracing of CMP and similar instructions"),
@@ -123,9 +132,10 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) {
Options.TraceCmp |= ClCMPTracing;
Options.TraceDiv |= ClDIVTracing;
Options.TraceGep |= ClGEPTracing;
- Options.TracePC |= ClExperimentalTracePC;
+ Options.TracePC |= ClTracePC;
Options.TracePCGuard |= ClTracePCGuard;
- if (!Options.TracePCGuard && !Options.TracePC)
+ Options.Inline8bitCounters |= ClInline8bitCounters;
+ if (!Options.TracePCGuard && !Options.TracePC && !Options.Inline8bitCounters)
Options.TracePCGuard = true; // TracePCGuard is default.
Options.NoPrune |= !ClPruneBlocks;
return Options;
@@ -159,11 +169,22 @@ private:
void InjectTraceForSwitch(Function &F,
ArrayRef<Instruction *> SwitchTraceTargets);
bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks);
- void CreateFunctionGuardArray(size_t NumGuards, Function &F);
+ GlobalVariable *CreateFunctionLocalArrayInSection(size_t NumElements,
+ Function &F, Type *Ty,
+ const char *Section);
+ void CreateFunctionLocalArrays(size_t NumGuards, Function &F);
void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx);
- StringRef getSanCovTracePCGuardSection() const;
- StringRef getSanCovTracePCGuardSectionStart() const;
- StringRef getSanCovTracePCGuardSectionEnd() const;
+ void CreateInitCallForSection(Module &M, const char *InitFunctionName,
+ Type *Ty, const std::string &Section);
+
+ void SetNoSanitizeMetadata(Instruction *I) {
+ I->setMetadata(I->getModule()->getMDKindID("nosanitize"),
+ MDNode::get(*C, None));
+ }
+
+ std::string getSectionName(const std::string &Section) const;
+ std::string getSectionStart(const std::string &Section) const;
+ std::string getSectionEnd(const std::string &Section) const;
Function *SanCovTracePCIndir;
Function *SanCovTracePC, *SanCovTracePCGuard;
Function *SanCovTraceCmpFunction[4];
@@ -171,20 +192,48 @@ private:
Function *SanCovTraceGepFunction;
Function *SanCovTraceSwitchFunction;
InlineAsm *EmptyAsm;
- Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy;
+ Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy,
+ *Int8Ty, *Int8PtrTy;
Module *CurModule;
Triple TargetTriple;
LLVMContext *C;
const DataLayout *DL;
GlobalVariable *FunctionGuardArray; // for trace-pc-guard.
- bool HasSancovGuardsSection;
+ GlobalVariable *Function8bitCounterArray; // for inline-8bit-counters.
SanitizerCoverageOptions Options;
};
} // namespace
+void SanitizerCoverageModule::CreateInitCallForSection(
+ Module &M, const char *InitFunctionName, Type *Ty,
+ const std::string &Section) {
+ IRBuilder<> IRB(M.getContext());
+ Function *CtorFunc;
+ GlobalVariable *SecStart =
+ new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, nullptr,
+ getSectionStart(Section));
+ SecStart->setVisibility(GlobalValue::HiddenVisibility);
+ GlobalVariable *SecEnd =
+ new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
+ nullptr, getSectionEnd(Section));
+ SecEnd->setVisibility(GlobalValue::HiddenVisibility);
+
+ std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(
+ M, SanCovModuleCtorName, InitFunctionName, {Ty, Ty},
+ {IRB.CreatePointerCast(SecStart, Ty), IRB.CreatePointerCast(SecEnd, Ty)});
+
+ if (TargetTriple.supportsCOMDAT()) {
+ // Use comdat to dedup CtorFunc.
+ CtorFunc->setComdat(M.getOrInsertComdat(SanCovModuleCtorName));
+ appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc);
+ } else {
+ appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
+ }
+}
+
bool SanitizerCoverageModule::runOnModule(Module &M) {
if (Options.CoverageType == SanitizerCoverageOptions::SCK_None)
return false;
@@ -192,15 +241,18 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
DL = &M.getDataLayout();
CurModule = &M;
TargetTriple = Triple(M.getTargetTriple());
- HasSancovGuardsSection = false;
+ FunctionGuardArray = nullptr;
+ Function8bitCounterArray = nullptr;
IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits());
IntptrPtrTy = PointerType::getUnqual(IntptrTy);
Type *VoidTy = Type::getVoidTy(*C);
IRBuilder<> IRB(*C);
Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty());
Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
+ Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty());
Int64Ty = IRB.getInt64Ty();
Int32Ty = IRB.getInt32Ty();
+ Int8Ty = IRB.getInt8Ty();
SanCovTracePCIndir = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy));
@@ -243,34 +295,13 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
for (auto &F : M)
runOnFunction(F);
- // Create variable for module (compilation unit) name
- if (Options.TracePCGuard) {
- if (HasSancovGuardsSection) {
- Function *CtorFunc;
- GlobalVariable *SecStart = new GlobalVariable(
- M, Int32PtrTy, false, GlobalVariable::ExternalLinkage, nullptr,
- getSanCovTracePCGuardSectionStart());
- SecStart->setVisibility(GlobalValue::HiddenVisibility);
- GlobalVariable *SecEnd = new GlobalVariable(
- M, Int32PtrTy, false, GlobalVariable::ExternalLinkage, nullptr,
- getSanCovTracePCGuardSectionEnd());
- SecEnd->setVisibility(GlobalValue::HiddenVisibility);
-
- std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(
- M, SanCovModuleCtorName, SanCovTracePCGuardInitName,
- {Int32PtrTy, Int32PtrTy},
- {IRB.CreatePointerCast(SecStart, Int32PtrTy),
- IRB.CreatePointerCast(SecEnd, Int32PtrTy)});
-
- if (TargetTriple.supportsCOMDAT()) {
- // Use comdat to dedup CtorFunc.
- CtorFunc->setComdat(M.getOrInsertComdat(SanCovModuleCtorName));
- appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc);
- } else {
- appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
- }
- }
- }
+ if (FunctionGuardArray)
+ CreateInitCallForSection(M, SanCovTracePCGuardInitName, Int32PtrTy,
+ SanCovGuardsSectionName);
+ if (Function8bitCounterArray)
+ CreateInitCallForSection(M, SanCov8bitCountersInitName, Int8PtrTy,
+ SanCovCountersSectionName);
+
return true;
}
@@ -393,17 +424,26 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
InjectTraceForGep(F, GepTraceTargets);
return true;
}
-void SanitizerCoverageModule::CreateFunctionGuardArray(size_t NumGuards,
- Function &F) {
- if (!Options.TracePCGuard) return;
- HasSancovGuardsSection = true;
- ArrayType *ArrayOfInt32Ty = ArrayType::get(Int32Ty, NumGuards);
- FunctionGuardArray = new GlobalVariable(
- *CurModule, ArrayOfInt32Ty, false, GlobalVariable::PrivateLinkage,
- Constant::getNullValue(ArrayOfInt32Ty), "__sancov_gen_");
+
+GlobalVariable *SanitizerCoverageModule::CreateFunctionLocalArrayInSection(
+ size_t NumElements, Function &F, Type *Ty, const char *Section) {
+ ArrayType *ArrayTy = ArrayType::get(Ty, NumElements);
+ auto Array = new GlobalVariable(
+ *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage,
+ Constant::getNullValue(ArrayTy), "__sancov_gen_");
if (auto Comdat = F.getComdat())
- FunctionGuardArray->setComdat(Comdat);
- FunctionGuardArray->setSection(getSanCovTracePCGuardSection());
+ Array->setComdat(Comdat);
+ Array->setSection(getSectionName(Section));
+ return Array;
+}
+void SanitizerCoverageModule::CreateFunctionLocalArrays(size_t NumGuards,
+ Function &F) {
+ if (Options.TracePCGuard)
+ FunctionGuardArray = CreateFunctionLocalArrayInSection(
+ NumGuards, F, Int32Ty, SanCovGuardsSectionName);
+ if (Options.Inline8bitCounters)
+ Function8bitCounterArray = CreateFunctionLocalArrayInSection(
+ NumGuards, F, Int8Ty, SanCovCountersSectionName);
}
bool SanitizerCoverageModule::InjectCoverage(Function &F,
@@ -413,11 +453,11 @@ bool SanitizerCoverageModule::InjectCoverage(Function &F,
case SanitizerCoverageOptions::SCK_None:
return false;
case SanitizerCoverageOptions::SCK_Function:
- CreateFunctionGuardArray(1, F);
+ CreateFunctionLocalArrays(1, F);
InjectCoverageAtBlock(F, F.getEntryBlock(), 0);
return true;
default: {
- CreateFunctionGuardArray(AllBlocks.size(), F);
+ CreateFunctionLocalArrays(AllBlocks.size(), F);
for (size_t i = 0, N = AllBlocks.size(); i < N; i++)
InjectCoverageAtBlock(F, *AllBlocks[i], i);
return true;
@@ -436,7 +476,7 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls(
Function &F, ArrayRef<Instruction *> IndirCalls) {
if (IndirCalls.empty())
return;
- assert(Options.TracePC || Options.TracePCGuard);
+ assert(Options.TracePC || Options.TracePCGuard || Options.Inline8bitCounters);
for (auto I : IndirCalls) {
IRBuilder<> IRB(I);
CallSite CS(I);
@@ -564,8 +604,8 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
if (Options.TracePC) {
IRB.CreateCall(SanCovTracePC); // gets the PC using GET_CALLER_PC.
IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
- } else {
- assert(Options.TracePCGuard);
+ }
+ if (Options.TracePCGuard) {
auto GuardPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(IRB.CreatePointerCast(FunctionGuardArray, IntptrTy),
ConstantInt::get(IntptrTy, Idx * 4)),
@@ -573,26 +613,39 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
IRB.CreateCall(SanCovTracePCGuard, GuardPtr);
IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
}
+ if (Options.Inline8bitCounters) {
+ auto CounterPtr = IRB.CreateGEP(
+ Function8bitCounterArray,
+ {ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)});
+ auto Load = IRB.CreateLoad(CounterPtr);
+ auto Inc = IRB.CreateAdd(Load, ConstantInt::get(Int8Ty, 1));
+ auto Store = IRB.CreateStore(Inc, CounterPtr);
+ SetNoSanitizeMetadata(Load);
+ SetNoSanitizeMetadata(Store);
+ }
}
-StringRef SanitizerCoverageModule::getSanCovTracePCGuardSection() const {
+std::string
+SanitizerCoverageModule::getSectionName(const std::string &Section) const {
if (TargetTriple.getObjectFormat() == Triple::COFF)
return ".SCOV$M";
if (TargetTriple.isOSBinFormatMachO())
- return "__DATA,__sancov_guards";
- return "__sancov_guards";
+ return "__DATA,__" + Section;
+ return "__" + Section;
}
-StringRef SanitizerCoverageModule::getSanCovTracePCGuardSectionStart() const {
+std::string
+SanitizerCoverageModule::getSectionStart(const std::string &Section) const {
if (TargetTriple.isOSBinFormatMachO())
- return "\1section$start$__DATA$__sancov_guards";
- return "__start___sancov_guards";
+ return "\1section$start$__DATA$__" + Section;
+ return "__start___" + Section;
}
-StringRef SanitizerCoverageModule::getSanCovTracePCGuardSectionEnd() const {
+std::string
+SanitizerCoverageModule::getSectionEnd(const std::string &Section) const {
if (TargetTriple.isOSBinFormatMachO())
- return "\1section$end$__DATA$__sancov_guards";
- return "__stop___sancov_guards";
+ return "\1section$end$__DATA$__" + Section;
+ return "__stop___" + Section;
}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 3953198fe605..9a7882211bac 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1823,6 +1823,7 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
// An IV counter must preserve its type.
if (IncI->getNumOperands() == 2)
break;
+ LLVM_FALLTHROUGH;
default:
return nullptr;
}
diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index 930696b036c0..7d8da9b453f9 100644
--- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -14,6 +14,7 @@
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -83,6 +84,149 @@ static bool handleSwitchExpect(SwitchInst &SI) {
return true;
}
+/// Handler for PHINodes that define the value argument to an
+/// @llvm.expect call.
+///
+/// If the operand of the phi has a constant value and it 'contradicts'
+/// with the expected value of phi def, then the corresponding incoming
+/// edge of the phi is unlikely to be taken. Using that information,
+/// the branch probability info for the originating branch can be inferred.
+static void handlePhiDef(CallInst *Expect) {
+ Value &Arg = *Expect->getArgOperand(0);
+ ConstantInt *ExpectedValue = cast<ConstantInt>(Expect->getArgOperand(1));
+ const APInt &ExpectedPhiValue = ExpectedValue->getValue();
+
+ // Walk up in backward a list of instructions that
+ // have 'copy' semantics by 'stripping' the copies
+ // until a PHI node or an instruction of unknown kind
+ // is reached. Negation via xor is also handled.
+ //
+ // C = PHI(...);
+ // B = C;
+ // A = B;
+ // D = __builtin_expect(A, 0);
+ //
+ Value *V = &Arg;
+ SmallVector<Instruction *, 4> Operations;
+ while (!isa<PHINode>(V)) {
+ if (ZExtInst *ZExt = dyn_cast<ZExtInst>(V)) {
+ V = ZExt->getOperand(0);
+ Operations.push_back(ZExt);
+ continue;
+ }
+
+ if (SExtInst *SExt = dyn_cast<SExtInst>(V)) {
+ V = SExt->getOperand(0);
+ Operations.push_back(SExt);
+ continue;
+ }
+
+ BinaryOperator *BinOp = dyn_cast<BinaryOperator>(V);
+ if (!BinOp || BinOp->getOpcode() != Instruction::Xor)
+ return;
+
+ ConstantInt *CInt = dyn_cast<ConstantInt>(BinOp->getOperand(1));
+ if (!CInt)
+ return;
+
+ V = BinOp->getOperand(0);
+ Operations.push_back(BinOp);
+ }
+
+ // Executes the recorded operations on input 'Value'.
+ auto ApplyOperations = [&](const APInt &Value) {
+ APInt Result = Value;
+ for (auto Op : llvm::reverse(Operations)) {
+ switch (Op->getOpcode()) {
+ case Instruction::Xor:
+ Result ^= cast<ConstantInt>(Op->getOperand(1))->getValue();
+ break;
+ case Instruction::ZExt:
+ Result = Result.zext(Op->getType()->getIntegerBitWidth());
+ break;
+ case Instruction::SExt:
+ Result = Result.sext(Op->getType()->getIntegerBitWidth());
+ break;
+ default:
+ llvm_unreachable("Unexpected operation");
+ }
+ }
+ return Result;
+ };
+
+ auto *PhiDef = dyn_cast<PHINode>(V);
+
+ // Get the first dominating conditional branch of the operand
+ // i's incoming block.
+ auto GetDomConditional = [&](unsigned i) -> BranchInst * {
+ BasicBlock *BB = PhiDef->getIncomingBlock(i);
+ BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (BI && BI->isConditional())
+ return BI;
+ BB = BB->getSinglePredecessor();
+ if (!BB)
+ return nullptr;
+ BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || BI->isUnconditional())
+ return nullptr;
+ return BI;
+ };
+
+ // Now walk through all Phi operands to find phi oprerands with values
+ // conflicting with the expected phi output value. Any such operand
+ // indicates the incoming edge to that operand is unlikely.
+ for (unsigned i = 0, e = PhiDef->getNumIncomingValues(); i != e; ++i) {
+
+ Value *PhiOpnd = PhiDef->getIncomingValue(i);
+ ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);
+ if (!CI)
+ continue;
+
+ // Not an interesting case when IsUnlikely is false -- we can not infer
+ // anything useful when the operand value matches the expected phi
+ // output.
+ if (ExpectedPhiValue == ApplyOperations(CI->getValue()))
+ continue;
+
+ BranchInst *BI = GetDomConditional(i);
+ if (!BI)
+ continue;
+
+ MDBuilder MDB(PhiDef->getContext());
+
+ // There are two situations in which an operand of the PhiDef comes
+ // from a given successor of a branch instruction BI.
+ // 1) When the incoming block of the operand is the successor block;
+ // 2) When the incoming block is BI's enclosing block and the
+ // successor is the PhiDef's enclosing block.
+ //
+ // Returns true if the operand which comes from OpndIncomingBB
+ // comes from outgoing edge of BI that leads to Succ block.
+ auto *OpndIncomingBB = PhiDef->getIncomingBlock(i);
+ auto IsOpndComingFromSuccessor = [&](BasicBlock *Succ) {
+ if (OpndIncomingBB == Succ)
+ // If this successor is the incoming block for this
+ // Phi operand, then this successor does lead to the Phi.
+ return true;
+ if (OpndIncomingBB == BI->getParent() && Succ == PhiDef->getParent())
+ // Otherwise, if the edge is directly from the branch
+ // to the Phi, this successor is the one feeding this
+ // Phi operand.
+ return true;
+ return false;
+ };
+
+ if (IsOpndComingFromSuccessor(BI->getSuccessor(1)))
+ BI->setMetadata(
+ LLVMContext::MD_prof,
+ MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight));
+ else if (IsOpndComingFromSuccessor(BI->getSuccessor(0)))
+ BI->setMetadata(
+ LLVMContext::MD_prof,
+ MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight));
+ }
+}
+
// Handle both BranchInst and SelectInst.
template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) {
@@ -99,25 +243,31 @@ template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) {
ICmpInst *CmpI = dyn_cast<ICmpInst>(BSI.getCondition());
CmpInst::Predicate Predicate;
- uint64_t ValueComparedTo = 0;
+ ConstantInt *CmpConstOperand = nullptr;
if (!CmpI) {
CI = dyn_cast<CallInst>(BSI.getCondition());
Predicate = CmpInst::ICMP_NE;
- ValueComparedTo = 0;
} else {
Predicate = CmpI->getPredicate();
if (Predicate != CmpInst::ICMP_NE && Predicate != CmpInst::ICMP_EQ)
return false;
- ConstantInt *CmpConstOperand = dyn_cast<ConstantInt>(CmpI->getOperand(1));
+
+ CmpConstOperand = dyn_cast<ConstantInt>(CmpI->getOperand(1));
if (!CmpConstOperand)
return false;
- ValueComparedTo = CmpConstOperand->getZExtValue();
CI = dyn_cast<CallInst>(CmpI->getOperand(0));
}
if (!CI)
return false;
+ uint64_t ValueComparedTo = 0;
+ if (CmpConstOperand) {
+ if (CmpConstOperand->getBitWidth() > 64)
+ return false;
+ ValueComparedTo = CmpConstOperand->getZExtValue();
+ }
+
Function *Fn = CI->getCalledFunction();
if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect)
return false;
@@ -181,6 +331,10 @@ static bool lowerExpectIntrinsic(Function &F) {
Function *Fn = CI->getCalledFunction();
if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) {
+ // Before erasing the llvm.expect, walk backward to find
+ // phi that define llvm.expect's first arg, and
+ // infer branch probability:
+ handlePhiDef(CI);
Value *Exp = CI->getArgOperand(0);
CI->replaceAllUsesWith(Exp);
CI->eraseFromParent();
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 77b2bd84f9b6..350b50ffcdd4 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// Rewrite an existing set of gc.statepoints such that they make potential
-// relocations performed by the garbage collector explicit in the IR.
+// Rewrite call/invoke instructions so as to make potential relocations
+// performed by the garbage collector explicit in the IR.
//
//===----------------------------------------------------------------------===//
@@ -2094,9 +2094,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// live in the IR. We'll remove all of these when done.
SmallVector<CallInst *, 64> Holders;
- // Insert a dummy call with all of the arguments to the vm_state we'll need
- // for the actual safepoint insertion. This ensures reference arguments in
- // the deopt argument list are considered live through the safepoint (and
+ // Insert a dummy call with all of the deopt operands we'll need for the
+ // actual safepoint insertion as arguments. This ensures reference operands
+ // in the deopt argument list are considered live through the safepoint (and
// thus makes sure they get relocated.)
for (CallSite CS : ToUpdate) {
SmallVector<Value *, 64> DeoptValues;
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 6e113bccff94..fb1b5813fd79 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -3698,7 +3698,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
int Idx = 0, Size = Offsets.Splits.size();
for (;;) {
auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
- auto *PartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace());
+ auto *LoadPartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace());
+ auto *StorePartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace());
// Either lookup a split load or create one.
LoadInst *PLoad;
@@ -3709,7 +3710,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
PLoad = IRB.CreateAlignedLoad(
getAdjustedPtr(IRB, DL, LoadBasePtr,
APInt(DL.getPointerSizeInBits(), PartOffset),
- PartPtrTy, LoadBasePtr->getName() + "."),
+ LoadPartPtrTy, LoadBasePtr->getName() + "."),
getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
LI->getName());
}
@@ -3719,7 +3720,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
StoreInst *PStore = IRB.CreateAlignedStore(
PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
APInt(DL.getPointerSizeInBits(), PartOffset),
- PartPtrTy, StoreBasePtr->getName() + "."),
+ StorePartPtrTy, StoreBasePtr->getName() + "."),
getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
// Now build a new slice for the alloca.
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 1ec3d0d49637..1c1a75c111e9 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -37,10 +37,10 @@
using namespace llvm;
/// See comments in Cloning.h.
-BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
- ValueToValueMapTy &VMap,
+BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
const Twine &NameSuffix, Function *F,
- ClonedCodeInfo *CodeInfo) {
+ ClonedCodeInfo *CodeInfo,
+ DebugInfoFinder *DIFinder) {
DenseMap<const MDNode *, MDNode *> Cache;
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
@@ -50,10 +50,11 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
// Loop over all instructions, and copy them over.
for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
II != IE; ++II) {
+
+ if (DIFinder && F->getParent() && II->getDebugLoc())
+ DIFinder->processLocation(*F->getParent(), II->getDebugLoc().get());
+
Instruction *NewInst = II->clone();
- if (F && F->getSubprogram())
- DebugLoc::reparentDebugInfo(*NewInst, BB->getParent()->getSubprogram(),
- F->getSubprogram(), Cache);
if (II->hasName())
NewInst->setName(II->getName()+NameSuffix);
NewBB->getInstList().push_back(NewInst);
@@ -122,31 +123,38 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttributes(),
OldAttrs.getRetAttributes(), NewArgAttrs));
+ bool MustCloneSP =
+ OldFunc->getParent() && OldFunc->getParent() == NewFunc->getParent();
+ DISubprogram *SP = OldFunc->getSubprogram();
+ if (SP) {
+ assert(!MustCloneSP || ModuleLevelChanges);
+ // Add mappings for some DebugInfo nodes that we don't want duplicated
+ // even if they're distinct.
+ auto &MD = VMap.MD();
+ MD[SP->getUnit()].reset(SP->getUnit());
+ MD[SP->getType()].reset(SP->getType());
+ MD[SP->getFile()].reset(SP->getFile());
+ // If we're not cloning into the same module, no need to clone the
+ // subprogram
+ if (!MustCloneSP)
+ MD[SP].reset(SP);
+ }
+
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
OldFunc->getAllMetadata(MDs);
for (auto MD : MDs) {
- MDNode *NewMD;
- bool MustCloneSP =
- (MD.first == LLVMContext::MD_dbg && OldFunc->getParent() &&
- OldFunc->getParent() == NewFunc->getParent());
- if (MustCloneSP) {
- auto *SP = cast<DISubprogram>(MD.second);
- NewMD = DISubprogram::getDistinct(
- NewFunc->getContext(), SP->getScope(), SP->getName(),
- SP->getLinkageName(), SP->getFile(), SP->getLine(), SP->getType(),
- SP->isLocalToUnit(), SP->isDefinition(), SP->getScopeLine(),
- SP->getContainingType(), SP->getVirtuality(), SP->getVirtualIndex(),
- SP->getThisAdjustment(), SP->getFlags(), SP->isOptimized(),
- SP->getUnit(), SP->getTemplateParams(), SP->getDeclaration(),
- SP->getVariables(), SP->getThrownTypes());
- } else
- NewMD =
- MapMetadata(MD.second, VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
- TypeMapper, Materializer);
- NewFunc->addMetadata(MD.first, *NewMD);
+ NewFunc->addMetadata(
+ MD.first,
+ *MapMetadata(MD.second, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper, Materializer));
}
+ // When we remap instructions, we want to avoid duplicating inlined
+ // DISubprograms, so record all subprograms we find as we duplicate
+ // instructions and then freeze them in the MD map.
+ DebugInfoFinder DIFinder;
+
// Loop over all of the basic blocks in the function, cloning them as
// appropriate. Note that we save BE this way in order to handle cloning of
// recursive functions into themselves.
@@ -156,7 +164,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
const BasicBlock &BB = *BI;
// Create a new basic block and copy instructions into it!
- BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo);
+ BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo,
+ SP ? &DIFinder : nullptr);
// Add basic block mapping.
VMap[&BB] = CBB;
@@ -178,6 +187,12 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
Returns.push_back(RI);
}
+ for (DISubprogram *ISP : DIFinder.subprograms()) {
+ if (ISP != SP) {
+ VMap.MD()[ISP].reset(ISP);
+ }
+ }
+
// Loop over all of the instructions in the function, fixing up operand
// references as we go. This uses VMap to do all the hard work.
for (Function::iterator BB =
@@ -226,7 +241,7 @@ Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap,
}
SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
- CloneFunctionInto(NewF, F, VMap, /*ModuleLevelChanges=*/false, Returns, "",
+ CloneFunctionInto(NewF, F, VMap, F->getSubprogram() != nullptr, Returns, "",
CodeInfo);
return NewF;
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8b9a64c220cc..799eef21dc4e 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4779,6 +4779,7 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
scalarizeInstruction(&I, true);
break;
}
+ LLVM_FALLTHROUGH;
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
@@ -7396,6 +7397,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
// likely.
return Cost / getReciprocalPredBlockProb();
}
+ LLVM_FALLTHROUGH;
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e6f78e6b94a3..d1349535f298 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -259,6 +259,7 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
if (hasVectorInstrinsicScalarOpd(ID, 1)) {
return (CI->getArgOperand(1) == Scalar);
}
+ LLVM_FALLTHROUGH;
}
default:
return false;
@@ -4749,56 +4750,18 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
return nullptr;
}
-namespace {
-/// Tracks instructons and its children.
-class WeakTrackingVHWithLevel final : public CallbackVH {
- /// Operand index of the instruction currently beeing analized.
- unsigned Level = 0;
- /// Is this the instruction that should be vectorized, or are we now
- /// processing children (i.e. operands of this instruction) for potential
- /// vectorization?
- bool IsInitial = true;
-
-public:
- explicit WeakTrackingVHWithLevel() = default;
- WeakTrackingVHWithLevel(Value *V) : CallbackVH(V){};
- /// Restart children analysis each time it is repaced by the new instruction.
- void allUsesReplacedWith(Value *New) override {
- setValPtr(New);
- Level = 0;
- IsInitial = true;
- }
- /// Check if the instruction was not deleted during vectorization.
- bool isValid() const { return !getValPtr(); }
- /// Is the istruction itself must be vectorized?
- bool isInitial() const { return IsInitial; }
- /// Try to vectorize children.
- void clearInitial() { IsInitial = false; }
- /// Are all children processed already?
- bool isFinal() const {
- assert(getValPtr() &&
- (isa<Instruction>(getValPtr()) &&
- cast<Instruction>(getValPtr())->getNumOperands() >= Level));
- return getValPtr() &&
- cast<Instruction>(getValPtr())->getNumOperands() == Level;
- }
- /// Get next child operation.
- Value *nextOperand() {
- assert(getValPtr() && isa<Instruction>(getValPtr()) &&
- cast<Instruction>(getValPtr())->getNumOperands() > Level);
- return cast<Instruction>(getValPtr())->getOperand(Level++);
- }
- virtual ~WeakTrackingVHWithLevel() = default;
-};
-} // namespace
-
-/// \brief Attempt to reduce a horizontal reduction.
-/// If it is legal to match a horizontal reduction feeding
-/// the phi node P with reduction operators Root in a basic block BB, then check
-/// if it can be done.
-/// \returns true if a horizontal reduction was matched and reduced.
-/// \returns false if a horizontal reduction was not matched.
-static bool canBeVectorized(
+/// Attempt to reduce a horizontal reduction.
+/// If it is legal to match a horizontal reduction feeding the phi node \a P
+/// with reduction operators \a Root (or one of its operands) in a basic block
+/// \a BB, then check if it can be done. If horizontal reduction is not found
+/// and root instruction is a binary operation, vectorization of the operands is
+/// attempted.
+/// \returns true if a horizontal reduction was matched and reduced or operands
+/// of one of the binary instruction were vectorized.
+/// \returns false if a horizontal reduction was not matched (or not possible)
+/// or no vectorization of any binary operation feeding \a Root instruction was
+/// performed.
+static bool tryToVectorizeHorReductionOrInstOperands(
PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R,
TargetTransformInfo *TTI,
const function_ref<bool(BinaryOperator *, BoUpSLP &)> Vectorize) {
@@ -4810,56 +4773,62 @@ static bool canBeVectorized(
if (Root->getParent() != BB)
return false;
- SmallVector<WeakTrackingVHWithLevel, 8> Stack(1, Root);
+ // Start analysis starting from Root instruction. If horizontal reduction is
+ // found, try to vectorize it. If it is not a horizontal reduction or
+ // vectorization is not possible or not effective, and currently analyzed
+ // instruction is a binary operation, try to vectorize the operands, using
+ // pre-order DFS traversal order. If the operands were not vectorized, repeat
+ // the same procedure considering each operand as a possible root of the
+ // horizontal reduction.
+ // Interrupt the process if the Root instruction itself was vectorized or all
+ // sub-trees not higher that RecursionMaxDepth were analyzed/vectorized.
+ SmallVector<std::pair<WeakTrackingVH, unsigned>, 8> Stack(1, {Root, 0});
SmallSet<Value *, 8> VisitedInstrs;
bool Res = false;
while (!Stack.empty()) {
- Value *V = Stack.back();
- if (!V) {
- Stack.pop_back();
+ Value *V;
+ unsigned Level;
+ std::tie(V, Level) = Stack.pop_back_val();
+ if (!V)
continue;
- }
auto *Inst = dyn_cast<Instruction>(V);
- if (!Inst || isa<PHINode>(Inst)) {
- Stack.pop_back();
+ if (!Inst || isa<PHINode>(Inst))
continue;
- }
- if (Stack.back().isInitial()) {
- Stack.back().clearInitial();
- if (auto *BI = dyn_cast<BinaryOperator>(Inst)) {
- HorizontalReduction HorRdx;
- if (HorRdx.matchAssociativeReduction(P, BI)) {
- if (HorRdx.tryToReduce(R, TTI)) {
- Res = true;
- P = nullptr;
- continue;
- }
- }
- if (P) {
- Inst = dyn_cast<Instruction>(BI->getOperand(0));
- if (Inst == P)
- Inst = dyn_cast<Instruction>(BI->getOperand(1));
- if (!Inst) {
- P = nullptr;
- continue;
- }
+ if (auto *BI = dyn_cast<BinaryOperator>(Inst)) {
+ HorizontalReduction HorRdx;
+ if (HorRdx.matchAssociativeReduction(P, BI)) {
+ if (HorRdx.tryToReduce(R, TTI)) {
+ Res = true;
+ // Set P to nullptr to avoid re-analysis of phi node in
+ // matchAssociativeReduction function unless this is the root node.
+ P = nullptr;
+ continue;
}
}
- P = nullptr;
- if (Vectorize(dyn_cast<BinaryOperator>(Inst), R)) {
- Res = true;
- continue;
+ if (P) {
+ Inst = dyn_cast<Instruction>(BI->getOperand(0));
+ if (Inst == P)
+ Inst = dyn_cast<Instruction>(BI->getOperand(1));
+ if (!Inst) {
+ // Set P to nullptr to avoid re-analysis of phi node in
+ // matchAssociativeReduction function unless this is the root node.
+ P = nullptr;
+ continue;
+ }
}
}
- if (Stack.back().isFinal()) {
- Stack.pop_back();
+ // Set P to nullptr to avoid re-analysis of phi node in
+ // matchAssociativeReduction function unless this is the root node.
+ P = nullptr;
+ if (Vectorize(dyn_cast<BinaryOperator>(Inst), R)) {
+ Res = true;
continue;
}
- if (auto *NextV = dyn_cast<Instruction>(Stack.back().nextOperand()))
- if (NextV->getParent() == BB && VisitedInstrs.insert(NextV).second &&
- Stack.size() < RecursionMaxDepth)
- Stack.push_back(NextV);
+ // Try to vectorize operands.
+ if (++Level < RecursionMaxDepth)
+ for (auto *Op : Inst->operand_values())
+ Stack.emplace_back(Op, Level);
}
return Res;
}
@@ -4876,10 +4845,10 @@ bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V,
if (!isa<BinaryOperator>(I))
P = nullptr;
// Try to match and vectorize a horizontal reduction.
- return canBeVectorized(P, I, BB, R, TTI,
- [this](BinaryOperator *BI, BoUpSLP &R) -> bool {
- return tryToVectorize(BI, R);
- });
+ return tryToVectorizeHorReductionOrInstOperands(
+ P, I, BB, R, TTI, [this](BinaryOperator *BI, BoUpSLP &R) -> bool {
+ return tryToVectorize(BI, R);
+ });
}
bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {