aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-06-16 21:03:24 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-06-16 21:03:24 +0000
commit7c7aba6e5fef47a01a136be655b0a92cfd7090f6 (patch)
tree99ec531924f6078534b100ab9d7696abce848099 /lib
parent7ab83427af0f77b59941ceba41d509d7d097b065 (diff)
downloadsrc-7c7aba6e5fef47a01a136be655b0a92cfd7090f6.tar.gz
src-7c7aba6e5fef47a01a136be655b0a92cfd7090f6.zip
Notes
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp22
-rw-r--r--lib/Analysis/CallGraphSCCPass.cpp8
-rw-r--r--lib/Analysis/DivergenceAnalysis.cpp2
-rw-r--r--lib/Analysis/MemorySSA.cpp2
-rw-r--r--lib/Analysis/ScalarEvolution.cpp129
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp4
-rw-r--r--lib/Analysis/ValueTracking.cpp5
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp87
-rw-r--r--lib/Bitcode/Reader/MetadataLoader.cpp123
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp52
-rw-r--r--lib/CMakeLists.txt1
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp6
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp6
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h7
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.cpp33
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.h3
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp6
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp2
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerHelper.cpp81
-rw-r--r--lib/CodeGen/GlobalISel/MachineIRBuilder.cpp18
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp10
-rw-r--r--lib/CodeGen/MachineLICM.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp62
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp26
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp15
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp21
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp18
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp61
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp79
-rw-r--r--lib/CodeGen/SplitKit.cpp3
-rw-r--r--lib/CodeGen/StackColoring.cpp237
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp28
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp49
-rw-r--r--lib/DebugInfo/CodeView/CMakeLists.txt3
-rw-r--r--lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp5
-rw-r--r--lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp2
-rw-r--r--lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp37
-rw-r--r--lib/DebugInfo/CodeView/StringsAndChecksums.cpp55
-rw-r--r--lib/DebugInfo/CodeView/SymbolDumper.cpp8
-rw-r--r--lib/DebugInfo/CodeView/SymbolRecordMapping.cpp4
-rw-r--r--lib/DebugInfo/CodeView/TypeDatabase.cpp71
-rw-r--r--lib/DebugInfo/CodeView/TypeIndex.cpp81
-rw-r--r--lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp2
-rw-r--r--lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp7
-rw-r--r--lib/DebugInfo/DWARF/DWARFContext.cpp249
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugFrame.cpp23
-rw-r--r--lib/DebugInfo/DWARF/DWARFVerifier.cpp34
-rw-r--r--lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp2
-rw-r--r--lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp34
-rw-r--r--lib/DebugInfo/PDB/Native/InfoStream.cpp4
-rw-r--r--lib/DebugInfo/PDB/Native/PDBFile.cpp10
-rw-r--r--lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp10
-rw-r--r--lib/DebugInfo/PDB/Native/PDBStringTable.cpp3
-rw-r--r--lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp5
-rw-r--r--lib/DebugInfo/PDB/Native/PublicsStream.cpp9
-rw-r--r--lib/DebugInfo/PDB/Native/TpiHashing.cpp1
-rw-r--r--lib/DebugInfo/PDB/UDTLayout.cpp5
-rw-r--r--lib/Fuzzer/FuzzerDriver.cpp4
-rw-r--r--lib/Fuzzer/FuzzerLoop.cpp4
-rw-r--r--lib/Fuzzer/FuzzerTracePC.cpp23
-rw-r--r--lib/Fuzzer/FuzzerTracePC.h35
-rw-r--r--lib/Fuzzer/test/AbsNegAndConstant64Test.cpp2
-rw-r--r--lib/Fuzzer/test/CMakeLists.txt3
-rw-r--r--lib/Fuzzer/test/FourIndependentBranchesTest.cpp1
-rw-r--r--lib/Fuzzer/test/FuzzerUnittest.cpp12
-rw-r--r--lib/Fuzzer/test/ShrinkControlFlowTest.cpp1
-rw-r--r--lib/Fuzzer/test/SimpleHashTest.cpp2
-rw-r--r--lib/Fuzzer/test/SingleStrncmpTest.cpp1
-rw-r--r--lib/Fuzzer/test/TableLookupTest.cpp3
-rw-r--r--lib/Fuzzer/test/fuzzer-dirs.test8
-rw-r--r--lib/Fuzzer/test/inline-8bit-counters.test4
-rw-r--r--lib/Fuzzer/test/inline-8bit-counters/CMakeLists.txt12
-rw-r--r--lib/Fuzzer/test/trace-pc/CMakeLists.txt3
-rw-r--r--lib/IR/ConstantFold.cpp9
-rw-r--r--lib/IR/ConstantsContext.h18
-rw-r--r--lib/IR/DebugInfoMetadata.cpp28
-rw-r--r--lib/IR/IRBuilder.cpp15
-rw-r--r--lib/IR/Metadata.cpp2
-rw-r--r--lib/IR/ModuleSummaryIndex.cpp13
-rw-r--r--lib/IR/Verifier.cpp39
-rw-r--r--lib/LLVMBuild.txt1
-rw-r--r--lib/LTO/LTO.cpp200
-rw-r--r--lib/LTO/LTOModule.cpp12
-rw-r--r--lib/MC/MCParser/ELFAsmParser.cpp2
-rw-r--r--lib/MC/MCSectionELF.cpp2
-rw-r--r--lib/MC/WasmObjectWriter.cpp43
-rw-r--r--lib/Object/ArchiveWriter.cpp14
-rw-r--r--lib/Object/ELF.cpp1
-rw-r--r--lib/Object/IRSymtab.cpp6
-rw-r--r--lib/Object/WindowsResource.cpp156
-rw-r--r--lib/ObjectYAML/COFFYAML.cpp11
-rw-r--r--lib/ObjectYAML/CodeViewYAMLDebugSections.cpp338
-rw-r--r--lib/ObjectYAML/CodeViewYAMLSymbols.cpp64
-rw-r--r--lib/ObjectYAML/CodeViewYAMLTypes.cpp40
-rw-r--r--lib/ObjectYAML/ELFYAML.cpp1
-rw-r--r--lib/Option/Arg.cpp10
-rw-r--r--lib/Option/ArgList.cpp21
-rw-r--r--lib/Option/OptTable.cpp40
-rw-r--r--lib/Option/Option.cpp10
-rw-r--r--lib/Passes/PassBuilder.cpp6
-rw-r--r--lib/Support/BinaryStreamWriter.cpp4
-rw-r--r--lib/Support/DebugCounter.cpp6
-rw-r--r--lib/Support/FoldingSet.cpp42
-rw-r--r--lib/Support/ThreadPool.cpp19
-rw-r--r--lib/Support/Unix/Program.inc10
-rw-r--r--lib/TableGen/Record.cpp43
-rw-r--r--lib/TableGen/SetTheory.cpp20
-rw-r--r--lib/Target/AArch64/AArch64.td1
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp8
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp2
-rw-r--r--lib/Target/AArch64/AArch64SchedFalkorDetails.td61
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp4
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp34
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp10
-rw-r--r--lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp15
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h3
-rw-r--r--lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp21
-rw-r--r--lib/Target/AMDGPU/FLATInstructions.td77
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp14
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp19
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.td12
-rw-r--r--lib/Target/ARM/ARMCallLowering.cpp55
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td18
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.cpp45
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.h2
-rw-r--r--lib/Target/BPF/BPFAsmPrinter.cpp5
-rw-r--r--lib/Target/BPF/BPFISelDAGToDAG.cpp240
-rw-r--r--lib/Target/BPF/BPFInstrInfo.td14
-rw-r--r--lib/Target/Hexagon/HexagonGenMux.cpp52
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp15
-rw-r--r--lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp3
-rw-r--r--lib/Target/Hexagon/HexagonPatterns.td19
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp8
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp135
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp9
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp12
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp99
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.h3
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp176
-rw-r--r--lib/Target/Mips/MipsSubtarget.h2
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp26
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp118
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h22
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td10
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp2
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td13
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td10
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp14
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h1
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp2
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.h2
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.h2
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h4
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp25
-rw-r--r--lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp10
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h2
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp218
-rw-r--r--lib/Target/X86/X86InstrAVX512.td2
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td12
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp134
-rw-r--r--lib/Testing/CMakeLists.txt1
-rw-r--r--lib/Testing/LLVMBuild.txt19
-rw-r--r--lib/Testing/Support/CMakeLists.txt12
-rw-r--r--lib/Testing/Support/Error.cpp22
-rw-r--r--lib/Testing/Support/LLVMBuild.txt22
-rw-r--r--lib/Transforms/IPO/CrossDSOCFI.cpp11
-rw-r--r--lib/Transforms/IPO/Inliner.cpp53
-rw-r--r--lib/Transforms/IPO/LowerTypeTests.cpp166
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp414
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp6
-rw-r--r--lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp50
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp113
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp121
-rw-r--r--lib/Transforms/InstCombine/InstCombineInternal.h9
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp14
-rw-r--r--lib/Transforms/Instrumentation/CMakeLists.txt1
-rw-r--r--lib/Transforms/Instrumentation/IndirectCallPromotion.cpp353
-rw-r--r--lib/Transforms/Instrumentation/InstrProfiling.cpp12
-rw-r--r--lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp419
-rw-r--r--lib/Transforms/Instrumentation/SanitizerCoverage.cpp2
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp2
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp5
-rw-r--r--lib/Transforms/Scalar/GVNSink.cpp11
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp24
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp4
-rw-r--r--lib/Transforms/Scalar/RewriteStatepointsForGC.cpp71
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp11
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp219
-rw-r--r--lib/Transforms/Utils/PredicateInfo.cpp3
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp14
197 files changed, 4631 insertions, 2630 deletions
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index f743cb234c45..dbb1b01b94ac 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1011,10 +1011,24 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
// equal each other so we can exit early.
if (C1 && C2)
return NoAlias;
- if (isKnownNonEqual(GEP1->getOperand(GEP1->getNumOperands() - 1),
- GEP2->getOperand(GEP2->getNumOperands() - 1),
- DL))
- return NoAlias;
+ {
+ Value *GEP1LastIdx = GEP1->getOperand(GEP1->getNumOperands() - 1);
+ Value *GEP2LastIdx = GEP2->getOperand(GEP2->getNumOperands() - 1);
+ if (isa<PHINode>(GEP1LastIdx) || isa<PHINode>(GEP2LastIdx)) {
+ // If one of the indices is a PHI node, be safe and only use
+ // computeKnownBits so we don't make any assumptions about the
+ // relationships between the two indices. This is important if we're
+ // asking about values from different loop iterations. See PR32314.
+ // TODO: We may be able to change the check so we only do this when
+ // we definitely looked through a PHINode.
+ KnownBits Known1 = computeKnownBits(GEP1LastIdx, DL);
+ KnownBits Known2 = computeKnownBits(GEP2LastIdx, DL);
+ if (Known1.Zero.intersects(Known2.One) ||
+ Known1.One.intersects(Known2.Zero))
+ return NoAlias;
+ } else if (isKnownNonEqual(GEP1LastIdx, GEP2LastIdx, DL))
+ return NoAlias;
+ }
return MayAlias;
} else if (!LastIndexedStruct || !C1 || !C2) {
return MayAlias;
diff --git a/lib/Analysis/CallGraphSCCPass.cpp b/lib/Analysis/CallGraphSCCPass.cpp
index 5896e6e0902f..facda246936d 100644
--- a/lib/Analysis/CallGraphSCCPass.cpp
+++ b/lib/Analysis/CallGraphSCCPass.cpp
@@ -608,18 +608,18 @@ namespace {
}
bool runOnSCC(CallGraphSCC &SCC) override {
+ bool BannerPrinted = false;
auto PrintBannerOnce = [&] () {
- static bool BannerPrinted = false;
if (BannerPrinted)
return;
Out << Banner;
BannerPrinted = true;
};
for (CallGraphNode *CGN : SCC) {
- if (CGN->getFunction()) {
- if (isFunctionInPrintList(CGN->getFunction()->getName())) {
+ if (Function *F = CGN->getFunction()) {
+ if (!F->isDeclaration() && isFunctionInPrintList(F->getName())) {
PrintBannerOnce();
- CGN->getFunction()->print(Out);
+ F->print(Out);
}
} else if (llvm::isFunctionInPrintList("*")) {
PrintBannerOnce();
diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp
index 1b36569f7a07..2d39a0b02150 100644
--- a/lib/Analysis/DivergenceAnalysis.cpp
+++ b/lib/Analysis/DivergenceAnalysis.cpp
@@ -241,7 +241,7 @@ void DivergencePropagator::exploreDataDependency(Value *V) {
// Follow def-use chains of V.
for (User *U : V->users()) {
Instruction *UserInst = cast<Instruction>(U);
- if (DV.insert(UserInst).second)
+ if (!TTI.isAlwaysUniform(U) && DV.insert(UserInst).second)
Worklist.push_back(UserInst);
}
}
diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp
index e0e04a91410f..86d0d92799f2 100644
--- a/lib/Analysis/MemorySSA.cpp
+++ b/lib/Analysis/MemorySSA.cpp
@@ -1872,7 +1872,6 @@ MemorySSAPrinterLegacyPass::MemorySSAPrinterLegacyPass() : FunctionPass(ID) {
void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
}
bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) {
@@ -1957,6 +1956,7 @@ MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
#ifdef EXPENSIVE_CHECKS
MemoryAccess *NewNoCache = Walker.findClobber(StartingAccess, Q);
assert(NewNoCache == New && "Cache made us hand back a different result?");
+ (void)NewNoCache;
#endif
if (AutoResetWalker)
resetClobberWalker();
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index b9c4716b5528..aebc80a0a885 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -149,9 +149,9 @@ static cl::opt<unsigned> MaxValueCompareDepth(
cl::init(2));
static cl::opt<unsigned>
- MaxAddExprDepth("scalar-evolution-max-addexpr-depth", cl::Hidden,
- cl::desc("Maximum depth of recursive AddExpr"),
- cl::init(32));
+ MaxArithDepth("scalar-evolution-max-arith-depth", cl::Hidden,
+ cl::desc("Maximum depth of recursive arithmetics"),
+ cl::init(32));
static cl::opt<unsigned> MaxConstantEvolvingDepth(
"scalar-evolution-max-constant-evolving-depth", cl::Hidden,
@@ -2276,8 +2276,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
if (Ops.size() == 1) return Ops[0];
}
- // Limit recursion calls depth
- if (Depth > MaxAddExprDepth)
+ // Limit recursion calls depth.
+ if (Depth > MaxArithDepth)
return getOrCreateAddExpr(Ops, Flags);
// Okay, check to see if the same value occurs in the operand list more than
@@ -2293,7 +2293,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
++Count;
// Merge the values into a multiply.
const SCEV *Scale = getConstant(Ty, Count);
- const SCEV *Mul = getMulExpr(Scale, Ops[i]);
+ const SCEV *Mul = getMulExpr(Scale, Ops[i], SCEV::FlagAnyWrap, Depth + 1);
if (Ops.size() == Count)
return Mul;
Ops[i] = Mul;
@@ -2343,7 +2343,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
}
}
if (Ok)
- LargeOps.push_back(getMulExpr(LargeMulOps));
+ LargeOps.push_back(getMulExpr(LargeMulOps, SCEV::FlagAnyWrap, Depth + 1));
} else {
Ok = false;
break;
@@ -2417,7 +2417,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
if (MulOp.first != 0)
Ops.push_back(getMulExpr(
getConstant(MulOp.first),
- getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1)));
+ getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1));
if (Ops.empty())
return getZero(Ty);
if (Ops.size() == 1)
@@ -2445,11 +2446,12 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
Mul->op_begin()+MulOp);
MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
- InnerMul = getMulExpr(MulOps);
+ InnerMul = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1);
}
SmallVector<const SCEV *, 2> TwoOps = {getOne(Ty), InnerMul};
const SCEV *AddOne = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
- const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
+ const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV,
+ SCEV::FlagAnyWrap, Depth + 1);
if (Ops.size() == 2) return OuterMul;
if (AddOp < Idx) {
Ops.erase(Ops.begin()+AddOp);
@@ -2478,19 +2480,20 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
Mul->op_begin()+MulOp);
MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
- InnerMul1 = getMulExpr(MulOps);
+ InnerMul1 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1);
}
const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
if (OtherMul->getNumOperands() != 2) {
SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
OtherMul->op_begin()+OMulOp);
MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
- InnerMul2 = getMulExpr(MulOps);
+ InnerMul2 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1);
}
SmallVector<const SCEV *, 2> TwoOps = {InnerMul1, InnerMul2};
const SCEV *InnerMulSum =
getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
- const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
+ const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum,
+ SCEV::FlagAnyWrap, Depth + 1);
if (Ops.size() == 2) return OuterMul;
Ops.erase(Ops.begin()+Idx);
Ops.erase(Ops.begin()+OtherMulIdx-1);
@@ -2621,6 +2624,27 @@ ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops,
return S;
}
+const SCEV *
+ScalarEvolution::getOrCreateMulExpr(SmallVectorImpl<const SCEV *> &Ops,
+ SCEV::NoWrapFlags Flags) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(scMulExpr);
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ ID.AddPointer(Ops[i]);
+ void *IP = nullptr;
+ SCEVMulExpr *S =
+ static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+ if (!S) {
+ const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+ std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+ S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
+ O, Ops.size());
+ UniqueSCEVs.InsertNode(S, IP);
+ }
+ S->setNoWrapFlags(Flags);
+ return S;
+}
+
static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) {
uint64_t k = i*j;
if (j > 1 && k / j != i) Overflow = true;
@@ -2673,7 +2697,8 @@ static bool containsConstantSomewhere(const SCEV *StartExpr) {
/// Get a canonical multiply expression, or something simpler if possible.
const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
- SCEV::NoWrapFlags Flags) {
+ SCEV::NoWrapFlags Flags,
+ unsigned Depth) {
assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) &&
"only nuw or nsw allowed");
assert(!Ops.empty() && "Cannot get empty mul!");
@@ -2690,6 +2715,10 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
+ // Limit recursion calls depth.
+ if (Depth > MaxArithDepth)
+ return getOrCreateMulExpr(Ops, Flags);
+
// If there are any constants, fold them together.
unsigned Idx = 0;
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
@@ -2701,8 +2730,11 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// apply this transformation as well.
if (Add->getNumOperands() == 2)
if (containsConstantSomewhere(Add))
- return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)),
- getMulExpr(LHSC, Add->getOperand(1)));
+ return getAddExpr(getMulExpr(LHSC, Add->getOperand(0),
+ SCEV::FlagAnyWrap, Depth + 1),
+ getMulExpr(LHSC, Add->getOperand(1),
+ SCEV::FlagAnyWrap, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1);
++Idx;
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
@@ -2730,17 +2762,19 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
SmallVector<const SCEV *, 4> NewOps;
bool AnyFolded = false;
for (const SCEV *AddOp : Add->operands()) {
- const SCEV *Mul = getMulExpr(Ops[0], AddOp);
+ const SCEV *Mul = getMulExpr(Ops[0], AddOp, SCEV::FlagAnyWrap,
+ Depth + 1);
if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
NewOps.push_back(Mul);
}
if (AnyFolded)
- return getAddExpr(NewOps);
+ return getAddExpr(NewOps, SCEV::FlagAnyWrap, Depth + 1);
} else if (const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
// Negation preserves a recurrence's no self-wrap property.
SmallVector<const SCEV *, 4> Operands;
for (const SCEV *AddRecOp : AddRec->operands())
- Operands.push_back(getMulExpr(Ops[0], AddRecOp));
+ Operands.push_back(getMulExpr(Ops[0], AddRecOp, SCEV::FlagAnyWrap,
+ Depth + 1));
return getAddRecExpr(Operands, AddRec->getLoop(),
AddRec->getNoWrapFlags(SCEV::FlagNW));
@@ -2762,18 +2796,18 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
if (Ops.size() > MulOpsInlineThreshold)
break;
- // If we have an mul, expand the mul operands onto the end of the operands
- // list.
+ // If we have an mul, expand the mul operands onto the end of the
+ // operands list.
Ops.erase(Ops.begin()+Idx);
Ops.append(Mul->op_begin(), Mul->op_end());
DeletedMul = true;
}
- // If we deleted at least one mul, we added operands to the end of the list,
- // and they are not necessarily sorted. Recurse to resort and resimplify
- // any operands we just acquired.
+ // If we deleted at least one mul, we added operands to the end of the
+ // list, and they are not necessarily sorted. Recurse to resort and
+ // resimplify any operands we just acquired.
if (DeletedMul)
- return getMulExpr(Ops);
+ return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
}
// If there are any add recurrences in the operands list, see if any other
@@ -2784,8 +2818,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// Scan over all recurrences, trying to fold loop invariants into them.
for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
- // Scan all of the other operands to this mul and add them to the vector if
- // they are loop invariant w.r.t. the recurrence.
+ // Scan all of the other operands to this mul and add them to the vector
+ // if they are loop invariant w.r.t. the recurrence.
SmallVector<const SCEV *, 8> LIOps;
const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
const Loop *AddRecLoop = AddRec->getLoop();
@@ -2801,9 +2835,10 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step}
SmallVector<const SCEV *, 4> NewOps;
NewOps.reserve(AddRec->getNumOperands());
- const SCEV *Scale = getMulExpr(LIOps);
+ const SCEV *Scale = getMulExpr(LIOps, SCEV::FlagAnyWrap, Depth + 1);
for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
- NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
+ NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i),
+ SCEV::FlagAnyWrap, Depth + 1));
// Build the new addrec. Propagate the NUW and NSW flags if both the
// outer mul and the inner addrec are guaranteed to have no overflow.
@@ -2822,12 +2857,12 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
Ops[i] = NewRec;
break;
}
- return getMulExpr(Ops);
+ return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
}
- // Okay, if there weren't any loop invariants to be folded, check to see if
- // there are multiple AddRec's with the same loop induction variable being
- // multiplied together. If so, we can fold them.
+ // Okay, if there weren't any loop invariants to be folded, check to see
+ // if there are multiple AddRec's with the same loop induction variable
+ // being multiplied together. If so, we can fold them.
// {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
// = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
@@ -2869,7 +2904,9 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
const SCEV *CoeffTerm = getConstant(Ty, Coeff);
const SCEV *Term1 = AddRec->getOperand(y-z);
const SCEV *Term2 = OtherAddRec->getOperand(z);
- Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
+ Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1, Term2,
+ SCEV::FlagAnyWrap, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1);
}
}
AddRecOps.push_back(Term);
@@ -2887,7 +2924,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
}
}
if (OpsModified)
- return getMulExpr(Ops);
+ return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
// Otherwise couldn't fold anything into this recurrence. Move onto the
// next one.
@@ -2895,22 +2932,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// Okay, it looks like we really DO need an mul expr. Check to see if we
// already have one, otherwise create a new one.
- FoldingSetNodeID ID;
- ID.AddInteger(scMulExpr);
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- ID.AddPointer(Ops[i]);
- void *IP = nullptr;
- SCEVMulExpr *S =
- static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
- if (!S) {
- const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
- std::uninitialized_copy(Ops.begin(), Ops.end(), O);
- S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
- O, Ops.size());
- UniqueSCEVs.InsertNode(S, IP);
- }
- S->setNoWrapFlags(Flags);
- return S;
+ return getOrCreateMulExpr(Ops, Flags);
}
/// Get a canonical unsigned division expression, or something simpler if
@@ -3713,7 +3735,8 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
}
const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
- SCEV::NoWrapFlags Flags) {
+ SCEV::NoWrapFlags Flags,
+ unsigned Depth) {
// Fast path: X - X --> 0.
if (LHS == RHS)
return getZero(LHS->getType());
@@ -3747,7 +3770,7 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
// larger scope than intended.
auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
- return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags);
+ return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags, Depth);
}
const SCEV *
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 488cb332a0b0..92328f6e5efd 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -103,6 +103,10 @@ bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const {
return TTIImpl->isSourceOfDivergence(V);
}
+bool llvm::TargetTransformInfo::isAlwaysUniform(const Value *V) const {
+ return TTIImpl->isAlwaysUniform(V);
+}
+
unsigned TargetTransformInfo::getFlatAddressSpace() const {
return TTIImpl->getFlatAddressSpace();
}
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index c0181662fd9d..b065f427b06c 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -852,7 +852,8 @@ static void computeKnownBitsFromShiftOperator(
Optional<bool> ShifterOperandIsNonZero;
// Early exit if we can't constrain any well-defined shift amount.
- if (!(ShiftAmtKZ & (BitWidth - 1)) && !(ShiftAmtKO & (BitWidth - 1))) {
+ if (!(ShiftAmtKZ & (PowerOf2Ceil(BitWidth) - 1)) &&
+ !(ShiftAmtKO & (PowerOf2Ceil(BitWidth) - 1))) {
ShifterOperandIsNonZero =
isKnownNonZero(I->getOperand(1), Depth + 1, Q);
if (!*ShifterOperandIsNonZero)
@@ -3026,7 +3027,7 @@ bool llvm::getConstantDataArrayInfo(const Value *V,
if (GV->getInitializer()->isNullValue()) {
Type *GVTy = GV->getValueType();
if ( (ArrayTy = dyn_cast<ArrayType>(GVTy)) ) {
- // A zeroinitializer for the array; There is no ConstantDataArray.
+ // A zeroinitializer for the array; there is no ConstantDataArray.
Array = nullptr;
} else {
const DataLayout &DL = GV->getParent()->getDataLayout();
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 95987fac74e1..0629c2d326ae 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -733,13 +733,13 @@ private:
std::vector<FunctionSummary::EdgeTy> makeCallList(ArrayRef<uint64_t> Record,
bool IsOldProfileFormat,
bool HasProfile);
- Error parseEntireSummary();
+ Error parseEntireSummary(unsigned ID);
Error parseModuleStringTable();
std::pair<ValueInfo, GlobalValue::GUID>
getValueInfoFromValueId(unsigned ValueId);
- ModulePathStringTableTy::iterator addThisModulePath();
+ ModuleSummaryIndex::ModuleInfo *addThisModule();
};
} // end anonymous namespace
@@ -2608,6 +2608,16 @@ Error BitcodeReader::materializeMetadata() {
if (Error Err = MDLoader->parseModuleMetadata())
return Err;
}
+
+ // Upgrade "Linker Options" module flag to "llvm.linker.options" module-level
+ // metadata.
+ if (Metadata *Val = TheModule->getModuleFlag("Linker Options")) {
+ NamedMDNode *LinkerOpts =
+ TheModule->getOrInsertNamedMetadata("llvm.linker.options");
+ for (const MDOperand &MDOptions : cast<MDNode>(Val)->operands())
+ LinkerOpts->addOperand(cast<MDNode>(MDOptions));
+ }
+
DeferredMetadataInfo.clear();
return Error::success();
}
@@ -4691,9 +4701,9 @@ ModuleSummaryIndexBitcodeReader::ModuleSummaryIndexBitcodeReader(
: BitcodeReaderBase(std::move(Cursor), Strtab), TheIndex(TheIndex),
ModulePath(ModulePath), ModuleId(ModuleId) {}
-ModulePathStringTableTy::iterator
-ModuleSummaryIndexBitcodeReader::addThisModulePath() {
- return TheIndex.addModulePath(ModulePath, ModuleId);
+ModuleSummaryIndex::ModuleInfo *
+ModuleSummaryIndexBitcodeReader::addThisModule() {
+ return TheIndex.addModule(ModulePath, ModuleId);
}
std::pair<ValueInfo, GlobalValue::GUID>
@@ -4844,6 +4854,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() {
return error("Invalid record");
break;
case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
+ case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID:
assert(!SeenValueSymbolTable &&
"Already read VST when parsing summary block?");
// We might not have a VST if there were no values in the
@@ -4856,7 +4867,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() {
SeenValueSymbolTable = true;
}
SeenGlobalValSummary = true;
- if (Error Err = parseEntireSummary())
+ if (Error Err = parseEntireSummary(Entry.ID))
return Err;
break;
case bitc::MODULE_STRTAB_BLOCK_ID:
@@ -4889,7 +4900,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() {
case bitc::MODULE_CODE_HASH: {
if (Record.size() != 5)
return error("Invalid hash length " + Twine(Record.size()).str());
- auto &Hash = addThisModulePath()->second.second;
+ auto &Hash = addThisModule()->second.second;
int Pos = 0;
for (auto &Val : Record) {
assert(!(Val >> 32) && "Unexpected high bits set");
@@ -4964,8 +4975,8 @@ std::vector<FunctionSummary::EdgeTy> ModuleSummaryIndexBitcodeReader::makeCallLi
// Eagerly parse the entire summary block. This populates the GlobalValueSummary
// objects in the index.
-Error ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
- if (Stream.EnterSubBlock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID))
+Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
+ if (Stream.EnterSubBlock(ID))
return error("Invalid record");
SmallVector<uint64_t, 64> Record;
@@ -5070,7 +5081,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
PendingTypeTestAssumeConstVCalls.clear();
PendingTypeCheckedLoadConstVCalls.clear();
auto VIAndOriginalGUID = getValueInfoFromValueId(ValueID);
- FS->setModulePath(addThisModulePath()->first());
+ FS->setModulePath(addThisModule()->first());
FS->setOriginalName(VIAndOriginalGUID.second);
TheIndex.addGlobalValueSummary(VIAndOriginalGUID.first, std::move(FS));
break;
@@ -5090,7 +5101,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
// string table section in the per-module index, we create a single
// module path string table entry with an empty (0) ID to take
// ownership.
- AS->setModulePath(addThisModulePath()->first());
+ AS->setModulePath(addThisModule()->first());
GlobalValue::GUID AliaseeGUID =
getValueInfoFromValueId(AliaseeID).first.getGUID();
@@ -5113,7 +5124,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
std::vector<ValueInfo> Refs =
makeRefList(ArrayRef<uint64_t>(Record).slice(2));
auto FS = llvm::make_unique<GlobalVarSummary>(Flags, std::move(Refs));
- FS->setModulePath(addThisModulePath()->first());
+ FS->setModulePath(addThisModule()->first());
auto GUID = getValueInfoFromValueId(ValueID);
FS->setOriginalName(GUID.second);
TheIndex.addGlobalValueSummary(GUID.first, std::move(FS));
@@ -5241,6 +5252,20 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
{{Record[0], Record[1]}, {Record.begin() + 2, Record.end()}});
break;
}
+ case bitc::FS_CFI_FUNCTION_DEFS: {
+ std::set<std::string> &CfiFunctionDefs = TheIndex.cfiFunctionDefs();
+ for (unsigned I = 0; I != Record.size(); I += 2)
+ CfiFunctionDefs.insert(
+ {Strtab.data() + Record[I], static_cast<size_t>(Record[I + 1])});
+ break;
+ }
+ case bitc::FS_CFI_FUNCTION_DECLS: {
+ std::set<std::string> &CfiFunctionDecls = TheIndex.cfiFunctionDecls();
+ for (unsigned I = 0; I != Record.size(); I += 2)
+ CfiFunctionDecls.insert(
+ {Strtab.data() + Record[I], static_cast<size_t>(Record[I + 1])});
+ break;
+ }
}
}
llvm_unreachable("Exit infinite loop");
@@ -5255,7 +5280,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
SmallVector<uint64_t, 64> Record;
SmallString<128> ModulePath;
- ModulePathStringTableTy::iterator LastSeenModulePath;
+ ModuleSummaryIndex::ModuleInfo *LastSeenModule = nullptr;
while (true) {
BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
@@ -5282,8 +5307,8 @@ Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
if (convertToString(Record, 1, ModulePath))
return error("Invalid record");
- LastSeenModulePath = TheIndex.addModulePath(ModulePath, ModuleId);
- ModuleIdMap[ModuleId] = LastSeenModulePath->first();
+ LastSeenModule = TheIndex.addModule(ModulePath, ModuleId);
+ ModuleIdMap[ModuleId] = LastSeenModule->first();
ModulePath.clear();
break;
@@ -5292,15 +5317,15 @@ Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
case bitc::MST_CODE_HASH: {
if (Record.size() != 5)
return error("Invalid hash length " + Twine(Record.size()).str());
- if (LastSeenModulePath == TheIndex.modulePaths().end())
+ if (!LastSeenModule)
return error("Invalid hash that does not follow a module path");
int Pos = 0;
for (auto &Val : Record) {
assert(!(Val >> 32) && "Unexpected high bits set");
- LastSeenModulePath->second.second[Pos++] = Val;
+ LastSeenModule->second.second[Pos++] = Val;
}
- // Reset LastSeenModulePath to avoid overriding the hash unexpectedly.
- LastSeenModulePath = TheIndex.modulePaths().end();
+ // Reset LastSeenModule to avoid overriding the hash unexpectedly.
+ LastSeenModule = nullptr;
break;
}
}
@@ -5507,13 +5532,16 @@ BitcodeModule::getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata,
}
// Parse the specified bitcode buffer and merge the index into CombinedIndex.
+// We don't use ModuleIdentifier here because the client may need to control the
+// module path used in the combined summary (e.g. when reading summaries for
+// regular LTO modules).
Error BitcodeModule::readSummary(ModuleSummaryIndex &CombinedIndex,
- unsigned ModuleId) {
+ StringRef ModulePath, uint64_t ModuleId) {
BitstreamCursor Stream(Buffer);
Stream.JumpToBit(ModuleBit);
ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, CombinedIndex,
- ModuleIdentifier, ModuleId);
+ ModulePath, ModuleId);
return R.parseModule();
}
@@ -5533,7 +5561,7 @@ Expected<std::unique_ptr<ModuleSummaryIndex>> BitcodeModule::getSummary() {
}
// Check if the given bitcode buffer contains a global value summary block.
-Expected<bool> BitcodeModule::hasSummary() {
+Expected<BitcodeLTOInfo> BitcodeModule::getLTOInfo() {
BitstreamCursor Stream(Buffer);
Stream.JumpToBit(ModuleBit);
@@ -5547,11 +5575,14 @@ Expected<bool> BitcodeModule::hasSummary() {
case BitstreamEntry::Error:
return error("Malformed block");
case BitstreamEntry::EndBlock:
- return false;
+ return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/false};
case BitstreamEntry::SubBlock:
if (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID)
- return true;
+ return BitcodeLTOInfo{/*IsThinLTO=*/true, /*HasSummary=*/true};
+
+ if (Entry.ID == bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID)
+ return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/true};
// Ignore other sub-blocks.
if (Stream.SkipBlock())
@@ -5638,12 +5669,12 @@ Expected<std::string> llvm::getBitcodeProducerString(MemoryBufferRef Buffer) {
Error llvm::readModuleSummaryIndex(MemoryBufferRef Buffer,
ModuleSummaryIndex &CombinedIndex,
- unsigned ModuleId) {
+ uint64_t ModuleId) {
Expected<BitcodeModule> BM = getSingleModule(Buffer);
if (!BM)
return BM.takeError();
- return BM->readSummary(CombinedIndex, ModuleId);
+ return BM->readSummary(CombinedIndex, BM->getModuleIdentifier(), ModuleId);
}
Expected<std::unique_ptr<ModuleSummaryIndex>>
@@ -5655,12 +5686,12 @@ llvm::getModuleSummaryIndex(MemoryBufferRef Buffer) {
return BM->getSummary();
}
-Expected<bool> llvm::hasGlobalValueSummary(MemoryBufferRef Buffer) {
+Expected<BitcodeLTOInfo> llvm::getBitcodeLTOInfo(MemoryBufferRef Buffer) {
Expected<BitcodeModule> BM = getSingleModule(Buffer);
if (!BM)
return BM.takeError();
- return BM->hasSummary();
+ return BM->getLTOInfo();
}
Expected<std::unique_ptr<ModuleSummaryIndex>>
diff --git a/lib/Bitcode/Reader/MetadataLoader.cpp b/lib/Bitcode/Reader/MetadataLoader.cpp
index ee2fe2a0cc18..b1504a8034e0 100644
--- a/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -407,6 +407,11 @@ void PlaceholderQueue::flush(BitcodeReaderMetadataList &MetadataList) {
} // anonynous namespace
+static Error error(const Twine &Message) {
+ return make_error<StringError>(
+ Message, make_error_code(BitcodeError::CorruptedBitcode));
+}
+
class MetadataLoader::MetadataLoaderImpl {
BitcodeReaderMetadataList MetadataList;
BitcodeReaderValueList &ValueList;
@@ -533,6 +538,88 @@ class MetadataLoader::MetadataLoaderImpl {
}
}
+ /// Upgrade the expression from previous versions.
+ Error upgradeDIExpression(uint64_t FromVersion,
+ MutableArrayRef<uint64_t> &Expr,
+ SmallVectorImpl<uint64_t> &Buffer) {
+ auto N = Expr.size();
+ switch (FromVersion) {
+ default:
+ return error("Invalid record");
+ case 0:
+ if (N >= 3 && Expr[N - 3] == dwarf::DW_OP_bit_piece)
+ Expr[N - 3] = dwarf::DW_OP_LLVM_fragment;
+ LLVM_FALLTHROUGH;
+ case 1:
+ // Move DW_OP_deref to the end.
+ if (N && Expr[0] == dwarf::DW_OP_deref) {
+ auto End = Expr.end();
+ if (Expr.size() >= 3 &&
+ *std::prev(End, 3) == dwarf::DW_OP_LLVM_fragment)
+ End = std::prev(End, 3);
+ std::move(std::next(Expr.begin()), End, Expr.begin());
+ *std::prev(End) = dwarf::DW_OP_deref;
+ }
+ NeedDeclareExpressionUpgrade = true;
+ LLVM_FALLTHROUGH;
+ case 2: {
+ // Change DW_OP_plus to DW_OP_plus_uconst.
+ // Change DW_OP_minus to DW_OP_uconst, DW_OP_minus
+ auto SubExpr = ArrayRef<uint64_t>(Expr);
+ while (!SubExpr.empty()) {
+ // Skip past other operators with their operands
+ // for this version of the IR, obtained from
+ // from historic DIExpression::ExprOperand::getSize().
+ size_t HistoricSize;
+ switch (SubExpr.front()) {
+ default:
+ HistoricSize = 1;
+ break;
+ case dwarf::DW_OP_constu:
+ case dwarf::DW_OP_minus:
+ case dwarf::DW_OP_plus:
+ HistoricSize = 2;
+ break;
+ case dwarf::DW_OP_LLVM_fragment:
+ HistoricSize = 3;
+ break;
+ }
+
+ // If the expression is malformed, make sure we don't
+ // copy more elements than we should.
+ HistoricSize = std::min(SubExpr.size(), HistoricSize);
+ ArrayRef<uint64_t> Args = SubExpr.slice(1, HistoricSize-1);
+
+ switch (SubExpr.front()) {
+ case dwarf::DW_OP_plus:
+ Buffer.push_back(dwarf::DW_OP_plus_uconst);
+ Buffer.append(Args.begin(), Args.end());
+ break;
+ case dwarf::DW_OP_minus:
+ Buffer.push_back(dwarf::DW_OP_constu);
+ Buffer.append(Args.begin(), Args.end());
+ Buffer.push_back(dwarf::DW_OP_minus);
+ break;
+ default:
+ Buffer.push_back(*SubExpr.begin());
+ Buffer.append(Args.begin(), Args.end());
+ break;
+ }
+
+ // Continue with remaining elements.
+ SubExpr = SubExpr.slice(HistoricSize);
+ }
+ Expr = MutableArrayRef<uint64_t>(Buffer);
+ LLVM_FALLTHROUGH;
+ }
+ case 3:
+ // Up-to-date!
+ break;
+ }
+
+ return Error::success();
+ }
+
void upgradeDebugInfo() {
upgradeCUSubprograms();
upgradeCUVariables();
@@ -590,11 +677,6 @@ public:
void upgradeDebugIntrinsics(Function &F) { upgradeDeclareExpressions(F); }
};
-static Error error(const Twine &Message) {
- return make_error<StringError>(
- Message, make_error_code(BitcodeError::CorruptedBitcode));
-}
-
Expected<bool>
MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
IndexCursor = Stream;
@@ -1551,34 +1633,13 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
IsDistinct = Record[0] & 1;
uint64_t Version = Record[0] >> 1;
auto Elts = MutableArrayRef<uint64_t>(Record).slice(1);
- unsigned N = Elts.size();
- // Perform various upgrades.
- switch (Version) {
- case 0:
- if (N >= 3 && Elts[N - 3] == dwarf::DW_OP_bit_piece)
- Elts[N - 3] = dwarf::DW_OP_LLVM_fragment;
- LLVM_FALLTHROUGH;
- case 1:
- // Move DW_OP_deref to the end.
- if (N && Elts[0] == dwarf::DW_OP_deref) {
- auto End = Elts.end();
- if (Elts.size() >= 3 && *std::prev(End, 3) == dwarf::DW_OP_LLVM_fragment)
- End = std::prev(End, 3);
- std::move(std::next(Elts.begin()), End, Elts.begin());
- *std::prev(End) = dwarf::DW_OP_deref;
- }
- NeedDeclareExpressionUpgrade = true;
- LLVM_FALLTHROUGH;
- case 2:
- // Up-to-date!
- break;
- default:
- return error("Invalid record");
- }
+
+ SmallVector<uint64_t, 6> Buffer;
+ if (Error Err = upgradeDIExpression(Version, Elts, Buffer))
+ return Err;
MetadataList.assignValue(
- GET_OR_DISTINCT(DIExpression, (Context, makeArrayRef(Record).slice(1))),
- NextMetadataNo);
+ GET_OR_DISTINCT(DIExpression, (Context, Elts)), NextMetadataNo);
NextMetadataNo++;
break;
}
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index d5879fec95cb..feeba31908ae 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -77,10 +77,13 @@ protected:
/// The stream created and owned by the client.
BitstreamWriter &Stream;
+ StringTableBuilder &StrtabBuilder;
+
public:
/// Constructs a BitcodeWriterBase object that writes to the provided
/// \p Stream.
- BitcodeWriterBase(BitstreamWriter &Stream) : Stream(Stream) {}
+ BitcodeWriterBase(BitstreamWriter &Stream, StringTableBuilder &StrtabBuilder)
+ : Stream(Stream), StrtabBuilder(StrtabBuilder) {}
protected:
void writeBitcodeHeader();
@@ -97,8 +100,6 @@ class ModuleBitcodeWriter : public BitcodeWriterBase {
/// Pointer to the buffer allocated by caller for bitcode writing.
const SmallVectorImpl<char> &Buffer;
- StringTableBuilder &StrtabBuilder;
-
/// The Module to write to bitcode.
const Module &M;
@@ -142,8 +143,8 @@ public:
BitstreamWriter &Stream, bool ShouldPreserveUseListOrder,
const ModuleSummaryIndex *Index, bool GenerateHash,
ModuleHash *ModHash = nullptr)
- : BitcodeWriterBase(Stream), Buffer(Buffer), StrtabBuilder(StrtabBuilder),
- M(*M), VE(*M, ShouldPreserveUseListOrder), Index(Index),
+ : BitcodeWriterBase(Stream, StrtabBuilder), Buffer(Buffer), M(*M),
+ VE(*M, ShouldPreserveUseListOrder), Index(Index),
GenerateHash(GenerateHash), ModHash(ModHash),
BitcodeStartBit(Stream.GetCurrentBitNo()) {
// Assign ValueIds to any callee values in the index that came from
@@ -331,10 +332,11 @@ public:
/// Constructs a IndexBitcodeWriter object for the given combined index,
/// writing to the provided \p Buffer. When writing a subset of the index
/// for a distributed backend, provide a \p ModuleToSummariesForIndex map.
- IndexBitcodeWriter(BitstreamWriter &Stream, const ModuleSummaryIndex &Index,
+ IndexBitcodeWriter(BitstreamWriter &Stream, StringTableBuilder &StrtabBuilder,
+ const ModuleSummaryIndex &Index,
const std::map<std::string, GVSummaryMapTy>
*ModuleToSummariesForIndex = nullptr)
- : BitcodeWriterBase(Stream), Index(Index),
+ : BitcodeWriterBase(Stream, StrtabBuilder), Index(Index),
ModuleToSummariesForIndex(ModuleToSummariesForIndex) {
// Assign unique value ids to all summaries to be written, for use
// in writing out the call graph edges. Save the mapping from GUID
@@ -1663,7 +1665,7 @@ void ModuleBitcodeWriter::writeDIExpression(const DIExpression *N,
SmallVectorImpl<uint64_t> &Record,
unsigned Abbrev) {
Record.reserve(N->getElements().size() + 1);
- const uint64_t Version = 2 << 1;
+ const uint64_t Version = 3 << 1;
Record.push_back((uint64_t)N->isDistinct() | Version);
Record.append(N->elements_begin(), N->elements_end());
@@ -3595,6 +3597,24 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
MaybeEmitOriginalName(*AS);
}
+ if (!Index.cfiFunctionDefs().empty()) {
+ for (auto &S : Index.cfiFunctionDefs()) {
+ NameVals.push_back(StrtabBuilder.add(S));
+ NameVals.push_back(S.size());
+ }
+ Stream.EmitRecord(bitc::FS_CFI_FUNCTION_DEFS, NameVals);
+ NameVals.clear();
+ }
+
+ if (!Index.cfiFunctionDecls().empty()) {
+ for (auto &S : Index.cfiFunctionDecls()) {
+ NameVals.push_back(StrtabBuilder.add(S));
+ NameVals.push_back(S.size());
+ }
+ Stream.EmitRecord(bitc::FS_CFI_FUNCTION_DECLS, NameVals);
+ NameVals.clear();
+ }
+
Stream.ExitBlock();
}
@@ -3829,6 +3849,14 @@ void BitcodeWriter::writeModule(const Module *M,
ModuleWriter.write();
}
+void BitcodeWriter::writeIndex(
+ const ModuleSummaryIndex *Index,
+ const std::map<std::string, GVSummaryMapTy> *ModuleToSummariesForIndex) {
+ IndexBitcodeWriter IndexWriter(*Stream, StrtabBuilder, *Index,
+ ModuleToSummariesForIndex);
+ IndexWriter.write();
+}
+
/// WriteBitcodeToFile - Write the specified module to the specified output
/// stream.
void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
@@ -3880,11 +3908,9 @@ void llvm::WriteIndexToFile(
SmallVector<char, 0> Buffer;
Buffer.reserve(256 * 1024);
- BitstreamWriter Stream(Buffer);
- writeBitcodeHeader(Stream);
-
- IndexBitcodeWriter IndexWriter(Stream, Index, ModuleToSummariesForIndex);
- IndexWriter.write();
+ BitcodeWriter Writer(Buffer);
+ Writer.writeIndex(&Index, ModuleToSummariesForIndex);
+ Writer.writeStrtab();
Out.write((char *)&Buffer.front(), Buffer.size());
}
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index f7c09be15fb7..946067e6358f 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -24,3 +24,4 @@ add_subdirectory(Fuzzer)
add_subdirectory(Passes)
add_subdirectory(ToolDrivers)
add_subdirectory(XRay)
+add_subdirectory(Testing)
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 407d5623d670..ad348d723bae 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1286,11 +1286,7 @@ bool AsmPrinter::doFinalization(Module &M) {
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
- // Emit module flags.
- SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
- M.getModuleFlagsMetadata(ModuleFlags);
- if (!ModuleFlags.empty())
- TLOF.emitModuleFlags(*OutStreamer, ModuleFlags, TM);
+ TLOF.emitModuleMetadata(*OutStreamer, M, TM);
if (TM.getTargetTriple().isOSBinFormatELF()) {
MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 04073b3aed68..dc39d1e6cb52 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -552,7 +552,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg);
DwarfExpr.addFragmentOffset(Expr);
SmallVector<uint64_t, 8> Ops;
- Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(dwarf::DW_OP_plus_uconst);
Ops.push_back(Offset);
Ops.append(Expr->elements_begin(), Expr->elements_end());
DIExpressionCursor Cursor(Ops);
@@ -821,7 +821,7 @@ void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
SmallVector<uint64_t, 8> Ops;
if (Location.isIndirect() && Location.getOffset()) {
- Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(dwarf::DW_OP_plus_uconst);
Ops.push_back(Location.getOffset());
}
DIExpressionCursor Cursor(Ops);
@@ -850,7 +850,7 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
SmallVector<uint64_t, 8> Ops;
if (Location.isIndirect() && Location.getOffset()) {
- Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(dwarf::DW_OP_plus_uconst);
Ops.push_back(Location.getOffset());
}
Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index e3fd21a1fd70..75eb355bfb54 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1511,7 +1511,7 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
DwarfExpr.setMemoryLocationKind();
SmallVector<uint64_t, 8> Ops;
if (Location.isIndirect() && Location.getOffset()) {
- Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(dwarf::DW_OP_plus_uconst);
Ops.push_back(Location.getOffset());
}
Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index ebfba4cfc275..5dfe06c64ec2 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -134,6 +134,13 @@ public:
assert(!FrameIndexExprs.empty() && "Expected an MMI entry");
assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry");
+ if (FrameIndexExprs.size()) {
+ auto *Expr = FrameIndexExprs.back().Expr;
+ // Get rid of duplicate non-fragment entries. More than one non-fragment
+ // dbg.declare makes no sense so ignore all but the first.
+ if (!Expr || !Expr->isFragment())
+ return;
+ }
FrameIndexExprs.append(V.FrameIndexExprs.begin(), V.FrameIndexExprs.end());
assert(all_of(FrameIndexExprs,
[](FrameIndexExpr &FIE) {
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index d96479f43433..fe38ee805682 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -248,15 +248,25 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
assert(Reg.Size == 0 && "subregister has same size as superregister");
// Pattern-match combinations for which more efficient representations exist.
- // [Reg, Offset, DW_OP_plus] --> [DW_OP_breg, Offset].
- // [Reg, Offset, DW_OP_minus] --> [DW_OP_breg, -Offset].
- // If Reg is a subregister we need to mask it out before subtracting.
- if (Op && ((Op->getOp() == dwarf::DW_OP_plus) ||
- (Op->getOp() == dwarf::DW_OP_minus && !SubRegisterSizeInBits))) {
- int Offset = Op->getArg(0);
- SignedOffset = (Op->getOp() == dwarf::DW_OP_plus) ? Offset : -Offset;
+ // [Reg, DW_OP_plus_uconst, Offset] --> [DW_OP_breg, Offset].
+ if (Op && (Op->getOp() == dwarf::DW_OP_plus_uconst)) {
+ SignedOffset = Op->getArg(0);
ExprCursor.take();
}
+
+ // [Reg, DW_OP_constu, Offset, DW_OP_plus] --> [DW_OP_breg, Offset]
+ // [Reg, DW_OP_constu, Offset, DW_OP_minus] --> [DW_OP_breg,-Offset]
+ // If Reg is a subregister we need to mask it out before subtracting.
+ if (Op && Op->getOp() == dwarf::DW_OP_constu) {
+ auto N = ExprCursor.peekNext();
+ if (N && (N->getOp() == dwarf::DW_OP_plus ||
+ (N->getOp() == dwarf::DW_OP_minus && !SubRegisterSizeInBits))) {
+ int Offset = Op->getArg(0);
+ SignedOffset = (N->getOp() == dwarf::DW_OP_minus) ? -Offset : Offset;
+ ExprCursor.consume(2);
+ }
+ }
+
if (FBReg)
addFBReg(SignedOffset);
else
@@ -320,17 +330,14 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
LocationKind = Unknown;
return;
}
- case dwarf::DW_OP_plus:
+ case dwarf::DW_OP_plus_uconst:
assert(LocationKind != Register);
emitOp(dwarf::DW_OP_plus_uconst);
emitUnsigned(Op->getArg(0));
break;
+ case dwarf::DW_OP_plus:
case dwarf::DW_OP_minus:
- assert(LocationKind != Register);
- // There is no DW_OP_minus_uconst.
- emitOp(dwarf::DW_OP_constu);
- emitUnsigned(Op->getArg(0));
- emitOp(dwarf::DW_OP_minus);
+ emitOp(Op->getOp());
break;
case dwarf::DW_OP_deref: {
assert(LocationKind != Register);
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h
index de8613200067..728f8ad9225b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -42,6 +42,9 @@ public:
DIExpressionCursor(ArrayRef<uint64_t> Expr)
: Start(Expr.begin()), End(Expr.end()) {}
+ DIExpressionCursor(const DIExpressionCursor &C)
+ : Start(C.Start), End(C.End) {}
+
/// Consume one operation.
Optional<DIExpression::ExprOperand> take() {
if (Start == End)
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 7f7d3e650e02..708f5f7536ff 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -475,7 +475,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
SmallVector<uint64_t, 9> Ops;
if (Location.isIndirect() && Location.getOffset()) {
- Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(dwarf::DW_OP_plus_uconst);
Ops.push_back(Location.getOffset());
}
// If we started with a pointer to the __Block_byref... struct, then
@@ -487,7 +487,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
// DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in
// adding the offset if it's 0.
if (forwardingFieldOffset > 0) {
- Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(dwarf::DW_OP_plus_uconst);
Ops.push_back(forwardingFieldOffset);
}
@@ -499,7 +499,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
// for the variable's field to get to the location of the actual variable:
// DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0.
if (varFieldOffset > 0) {
- Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(dwarf::DW_OP_plus_uconst);
Ops.push_back(varFieldOffset);
}
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index c2037cb7f1ae..37e176099ea7 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -134,7 +134,7 @@ static cl::opt<bool> DisablePreheaderProtect(
cl::desc("Disable protection against removing loop preheaders"));
static cl::opt<bool> ProfileGuidedSectionPrefix(
- "profile-guided-section-prefix", cl::Hidden, cl::init(true),
+ "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore,
cl::desc("Use profile info to add section prefix for hot/cold functions"));
static cl::opt<unsigned> FreqRatioToSkipMerge(
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index ef5818dabe23..1d0d3dffa4c5 100644
--- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -82,6 +82,12 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
case TargetOpcode::G_UDIV:
assert(Size == 32 && "Unsupported size");
return RTLIB::UDIV_I32;
+ case TargetOpcode::G_SREM:
+ assert(Size == 32 && "Unsupported size");
+ return RTLIB::SREM_I32;
+ case TargetOpcode::G_UREM:
+ assert(Size == 32 && "Unsupported size");
+ return RTLIB::UREM_I32;
case TargetOpcode::G_FADD:
assert((Size == 32 || Size == 64) && "Unsupported size");
return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
@@ -93,43 +99,57 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
llvm_unreachable("Unknown libcall function");
}
-static LegalizerHelper::LegalizeResult
-simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
- Type *OpType) {
+LegalizerHelper::LegalizeResult llvm::replaceWithLibcall(
+ MachineInstr &MI, MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
+ const CallLowering::ArgInfo &Result, ArrayRef<CallLowering::ArgInfo> Args) {
auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
- auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
const char *Name = TLI.getLibcallName(Libcall);
MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
- CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
- MachineOperand::CreateES(Name),
- {MI.getOperand(0).getReg(), OpType},
- {{MI.getOperand(1).getReg(), OpType},
- {MI.getOperand(2).getReg(), OpType}});
+ MIRBuilder.setInstr(MI);
+ if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
+ MachineOperand::CreateES(Name), Result, Args))
+ return LegalizerHelper::UnableToLegalize;
+
+ // We're about to remove MI, so move the insert point after it.
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(),
+ std::next(MIRBuilder.getInsertPt()));
+
MI.eraseFromParent();
return LegalizerHelper::Legalized;
}
+static LegalizerHelper::LegalizeResult
+simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
+ Type *OpType) {
+ auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
+ return replaceWithLibcall(MI, MIRBuilder, Libcall,
+ {MI.getOperand(0).getReg(), OpType},
+ {{MI.getOperand(1).getReg(), OpType},
+ {MI.getOperand(2).getReg(), OpType}});
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::libcall(MachineInstr &MI) {
- LLT Ty = MRI.getType(MI.getOperand(0).getReg());
- unsigned Size = Ty.getSizeInBits();
+ LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
+ unsigned Size = LLTy.getSizeInBits();
auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
- MIRBuilder.setInstr(MI);
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
case TargetOpcode::G_SDIV:
- case TargetOpcode::G_UDIV: {
- Type *Ty = Type::getInt32Ty(Ctx);
- return simpleLibcall(MI, MIRBuilder, Size, Ty);
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_UREM: {
+ Type *HLTy = Type::getInt32Ty(Ctx);
+ return simpleLibcall(MI, MIRBuilder, Size, HLTy);
}
case TargetOpcode::G_FADD:
case TargetOpcode::G_FPOW:
case TargetOpcode::G_FREM: {
- Type *Ty = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
- return simpleLibcall(MI, MIRBuilder, Size, Ty);
+ Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
+ return simpleLibcall(MI, MIRBuilder, Size, HLTy);
}
}
}
@@ -237,17 +257,18 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
unsigned NarrowSize = NarrowTy.getSizeInBits();
int NumParts =
MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
- LLT NarrowPtrTy = LLT::pointer(
- MRI.getType(MI.getOperand(1).getReg()).getAddressSpace(), NarrowSize);
+ LLT OffsetTy = LLT::scalar(
+ MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
SmallVector<unsigned, 2> DstRegs;
for (int i = 0; i < NumParts; ++i) {
unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
- unsigned SrcReg = MRI.createGenericVirtualRegister(NarrowPtrTy);
- unsigned Offset = MRI.createGenericVirtualRegister(LLT::scalar(64));
+ unsigned SrcReg = 0;
+ unsigned Adjustment = i * NarrowSize / 8;
+
+ MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy,
+ Adjustment);
- MIRBuilder.buildConstant(Offset, i * NarrowSize / 8);
- MIRBuilder.buildGEP(SrcReg, MI.getOperand(1).getReg(), Offset);
// TODO: This is conservatively correct, but we probably want to split the
// memory operands in the future.
MIRBuilder.buildLoad(DstReg, SrcReg, **MI.memoperands_begin());
@@ -263,17 +284,19 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
unsigned NarrowSize = NarrowTy.getSizeInBits();
int NumParts =
MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
- LLT NarrowPtrTy = LLT::pointer(
- MRI.getType(MI.getOperand(1).getReg()).getAddressSpace(), NarrowSize);
+ LLT OffsetTy = LLT::scalar(
+ MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
SmallVector<unsigned, 2> SrcRegs;
extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs);
for (int i = 0; i < NumParts; ++i) {
- unsigned DstReg = MRI.createGenericVirtualRegister(NarrowPtrTy);
- unsigned Offset = MRI.createGenericVirtualRegister(LLT::scalar(64));
- MIRBuilder.buildConstant(Offset, i * NarrowSize / 8);
- MIRBuilder.buildGEP(DstReg, MI.getOperand(1).getReg(), Offset);
+ unsigned DstReg = 0;
+ unsigned Adjustment = i * NarrowSize / 8;
+
+ MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy,
+ Adjustment);
+
// TODO: This is conservatively correct, but we probably want to split the
// memory operands in the future.
MIRBuilder.buildStore(SrcRegs[i], DstReg, **MI.memoperands_begin());
diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 54ef7e5c5a1b..79d312fb52ca 100644
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -191,6 +191,24 @@ MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0,
.addUse(Op1);
}
+Optional<MachineInstrBuilder>
+MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0,
+ const LLT &ValueTy, uint64_t Value) {
+ assert(Res == 0 && "Res is a result argument");
+ assert(ValueTy.isScalar() && "invalid offset type");
+
+ if (Value == 0) {
+ Res = Op0;
+ return None;
+ }
+
+ Res = MRI->createGenericVirtualRegister(MRI->getType(Op0));
+ unsigned TmpReg = MRI->createGenericVirtualRegister(ValueTy);
+
+ buildConstant(TmpReg, Value);
+ return buildGEP(Res, Op0, TmpReg);
+}
+
MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0,
uint32_t NumBits) {
assert(MRI->getType(Res).isPointer() &&
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index fc52b0da0d61..2d4b95974cc6 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -594,8 +594,8 @@ BranchProbability MachineBlockPlacement::collectViableSuccessors(
// Assume A->C is very hot (>90%), and C->D has a 50% probability, then after
// A->C is chosen as a fall-through, D won't be selected as a successor of C
// due to CFG constraint (the probability of C->D is not greater than
- // HotProb to break top-order). If we exclude E that is not in BlockFilter
- // when calculating the probability of C->D, D will be selected and we
+ // HotProb to break topo-order). If we exclude E that is not in BlockFilter
+ // when calculating the probability of C->D, D will be selected and we
// will get A C D B as the layout of this loop.
auto AdjustedSumProb = BranchProbability::getOne();
for (MachineBasicBlock *Succ : BB->successors()) {
@@ -1156,7 +1156,7 @@ void MachineBlockPlacement::precomputeTriangleChains() {
continue;
// Now we have an interesting triangle. Insert it if it's not part of an
- // existing chain
+ // existing chain.
// Note: This cannot be replaced with a call insert() or emplace() because
// the find key is BB, but the insert/emplace key is PDom.
auto Found = TriangleChainMap.find(&BB);
@@ -1298,9 +1298,9 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
// | | | |
// ---BB | | BB
// | | | |
- // | pred-- | Succ--
+ // | Pred-- | Succ--
// | | | |
- // ---succ ---pred--
+ // ---Succ ---Pred--
//
// cost = freq(S->Pred) + freq(BB->Succ) cost = 2 * freq (S->Pred)
// = freq(S->Pred) + freq(S->BB)
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 52d5819f8dbc..c7113f1fdc47 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -895,8 +895,11 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
// it could get allocated to something with a def during allocation.
- if (!MRI->isConstantPhysReg(Reg))
- return false;
+ // However, if the physreg is known to always be caller saved/restored
+ // then this use is safe to hoist.
+ if (!MRI->isConstantPhysReg(Reg) &&
+ !(TRI->isCallerPreservedPhysReg(Reg, *I.getParent()->getParent())))
+ return false;
// Otherwise it's safe to move.
continue;
} else if (!MO.isDead()) {
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a0967f574006..2d4422d94a17 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2217,7 +2217,8 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
SDNode *N) {
// Iff the flag result is dead:
// (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
- if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::UADDO) &&
+ if ((N0.getOpcode() == ISD::ADD ||
+ (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
isNullConstant(N1) && !N->hasAnyUseOfValue(1))
return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
N0.getOperand(0), N0.getOperand(1), CarryIn);
@@ -12460,10 +12461,27 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
- SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
- FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(),
- FirstInChain->getAlignment());
+
+ // make sure we use trunc store if it's necessary to be legal.
+ SDValue NewStore;
+ if (TLI.isTypeLegal(StoredVal.getValueType())) {
+ NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(),
+ FirstInChain->getAlignment());
+ } else { // Must be realized as a trunc store
+ EVT LegalizedStoredValueTy =
+ TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
+ unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits();
+ ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
+ SDValue ExtendedStoreVal =
+ DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
+ LegalizedStoredValueTy);
+ NewStore = DAG.getTruncStore(
+ NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
+ FirstInChain->getAlignment(),
+ FirstInChain->getMemOperand()->getFlags());
+ }
// Replace all merged stores with the new store.
for (unsigned i = 0; i < NumStores; ++i)
@@ -12731,8 +12749,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
IsFast) {
LastLegalType = i + 1;
// Or check whether a truncstore is legal.
- } else if (!LegalTypes &&
- TLI.getTypeAction(Context, StoreTy) ==
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValueTy =
TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
@@ -12947,8 +12964,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
else if (TLI.getTypeAction(Context, StoreTy) ==
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy);
- if (!LegalTypes &&
- TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
+ if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) &&
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy,
StoreTy) &&
@@ -12958,8 +12974,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
- FirstLoadAS, FirstLoadAlign, &IsFastLd) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
+ FirstLoadAlign, &IsFastLd) &&
IsFastLd)
LastLegalIntegerType = i + 1;
}
@@ -13189,10 +13205,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
Chain = ST->getChain();
}
- // Try transforming N to an indexed store.
- if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
- return SDValue(N, 0);
-
// FIXME: is there such a thing as a truncating indexed store?
if (ST->isTruncatingStore() && ST->isUnindexed() &&
Value.getValueType().isInteger()) {
@@ -13287,6 +13299,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}
}
+ // Try transforming N to an indexed store.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
//
// Make sure to do this only after attempting to merge stores in order to
@@ -14692,21 +14708,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
VT.getStoreSize());
SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
-
- // The new load must have the same position as the old load in terms of memory
- // dependency. Create a TokenFactor for Ld and NewLd and update uses of Ld's
- // output chain to use that TokenFactor.
- // TODO: This code is based on a similar sequence in x86 lowering. It should
- // be moved to a helper function, so it can be shared and reused.
- if (Ld->hasAnyUseOfValue(1)) {
- SDValue OldChain = SDValue(Ld, 1);
- SDValue NewChain = SDValue(NewLd.getNode(), 1);
- SDValue TokenFactor = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
- OldChain, NewChain);
- DAG.ReplaceAllUsesOfValueWith(OldChain, TokenFactor);
- DAG.UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain);
- }
-
+ DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
return NewLd;
}
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 606b8952f3c1..b736037d71dd 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -523,3 +523,29 @@ void FunctionLoweringInfo::setCurrentSwiftErrorVReg(
const MachineBasicBlock *MBB, const Value *Val, unsigned VReg) {
SwiftErrorVRegDefMap[std::make_pair(MBB, Val)] = VReg;
}
+
+std::pair<unsigned, bool>
+FunctionLoweringInfo::getOrCreateSwiftErrorVRegDefAt(const Instruction *I) {
+ auto Key = PointerIntPair<const Instruction *, 1, bool>(I, true);
+ auto It = SwiftErrorVRegDefUses.find(Key);
+ if (It == SwiftErrorVRegDefUses.end()) {
+ auto &DL = MF->getDataLayout();
+ const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+ unsigned VReg = MF->getRegInfo().createVirtualRegister(RC);
+ SwiftErrorVRegDefUses[Key] = VReg;
+ return std::make_pair(VReg, true);
+ }
+ return std::make_pair(It->second, false);
+}
+
+std::pair<unsigned, bool>
+FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) {
+ auto Key = PointerIntPair<const Instruction *, 1, bool>(I, false);
+ auto It = SwiftErrorVRegDefUses.find(Key);
+ if (It == SwiftErrorVRegDefUses.end()) {
+ unsigned VReg = getOrCreateSwiftErrorVReg(MBB, Val);
+ SwiftErrorVRegDefUses[Key] = VReg;
+ return std::make_pair(VReg, true);
+ }
+ return std::make_pair(It->second, false);
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index e54eaa3b81be..15e87b7af18d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2192,19 +2192,6 @@ static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) {
return TLI.getLibcallName(LC) != nullptr;
}
-/// Return true if sincos libcall is available and can be used to combine sin
-/// and cos.
-static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI,
- const TargetMachine &TM) {
- if (!isSinCosLibcallAvailable(Node, TLI))
- return false;
- // GNU sin/cos functions set errno while sincos does not. Therefore
- // combining sin and cos is only safe if unsafe-fpmath is enabled.
- if (TM.getTargetTriple().isGNUEnvironment() && !TM.Options.UnsafeFPMath)
- return false;
- return true;
-}
-
/// Only issue sincos libcall if both sin and cos are needed.
static bool useSinCos(SDNode *Node) {
unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN
@@ -3247,7 +3234,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin /
// fcos which share the same operand and both are used.
if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) ||
- canCombineSinCosLibcall(Node, TLI, TM))
+ isSinCosLibcallAvailable(Node, TLI))
&& useSinCos(Node)) {
SDVTList VTs = DAG.getVTList(VT, VT);
Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 0d5e07ded25c..a3ba52a148ee 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1828,10 +1828,11 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
ISD::UADDO : ISD::USUBO,
TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+ TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT);
+
if (hasOVF) {
EVT OvfVT = getSetCCResultType(NVT);
SDVTList VTList = DAG.getVTList(NVT, OvfVT);
- TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT);
int RevOpc;
if (N->getOpcode() == ISD::ADD) {
RevOpc = ISD::SUB;
@@ -1864,6 +1865,13 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
ISD::SETULT);
+
+ if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) {
+ SDValue Carry = DAG.getZExtOrTrunc(Cmp1, dl, NVT);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry);
+ return;
+ }
+
SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1,
DAG.getConstant(1, dl, NVT),
DAG.getConstant(0, dl, NVT));
@@ -1878,9 +1886,14 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
SDValue Cmp =
DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()),
LoOps[0], LoOps[1], ISD::SETULT);
- SDValue Borrow = DAG.getSelect(dl, NVT, Cmp,
- DAG.getConstant(1, dl, NVT),
- DAG.getConstant(0, dl, NVT));
+
+ SDValue Borrow;
+ if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent)
+ Borrow = DAG.getZExtOrTrunc(Cmp, dl, NVT);
+ else
+ Borrow = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT),
+ DAG.getConstant(0, dl, NVT));
+
Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
}
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index dff8bd2ad37d..7abdc76cb004 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7244,6 +7244,24 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
AddDbgValue(I, ToNode, false);
}
+void SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
+ SDValue NewMemOp) {
+ assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
+ if (!OldLoad->hasAnyUseOfValue(1))
+ return;
+
+ // The new memory operation must have the same position as the old load in
+ // terms of memory dependency. Create a TokenFactor for the old load and new
+ // memory operation and update uses of the old load's output chain to use that
+ // TokenFactor.
+ SDValue OldChain = SDValue(OldLoad, 1);
+ SDValue NewChain = SDValue(NewMemOp.getNode(), 1);
+ SDValue TokenFactor =
+ getNode(ISD::TokenFactor, SDLoc(OldLoad), MVT::Other, OldChain, NewChain);
+ ReplaceAllUsesOfValueWith(OldChain, TokenFactor);
+ UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain);
+}
+
//===----------------------------------------------------------------------===//
// SDNode Class
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index d34ac40b9496..f9f431db55be 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1496,9 +1496,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
true /*isfixed*/, 1 /*origidx*/,
0 /*partOffs*/));
// Create SDNode for the swifterror virtual register.
- OutVals.push_back(DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg(
- FuncInfo.MBB, FuncInfo.SwiftErrorArg),
- EVT(TLI.getPointerTy(DL))));
+ OutVals.push_back(
+ DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt(
+ &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first,
+ EVT(TLI.getPointerTy(DL))));
}
bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
@@ -3581,8 +3582,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
}
void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- assert(TLI.supportSwiftError() &&
+ assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
"call visitStoreToSwiftError when backend supports swifterror");
SmallVector<EVT, 4> ValueVTs;
@@ -3595,15 +3595,15 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
SDValue Src = getValue(SrcV);
// Create a virtual register, then update the virtual register.
- auto &DL = DAG.getDataLayout();
- const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
- unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC);
+ unsigned VReg; bool CreatedVReg;
+ std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I);
// Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
// Chain can be getRoot or getControlRoot.
SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
SDValue(Src.getNode(), Src.getResNo()));
DAG.setRoot(CopyNode);
- FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
+ if (CreatedVReg)
+ FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
}
void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
@@ -3633,7 +3633,8 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
// Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
SDValue L = DAG.getCopyFromReg(
getRoot(), getCurSDLoc(),
- FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, SV), ValueVTs[0]);
+ FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first,
+ ValueVTs[0]);
setValue(&I, L);
}
@@ -4942,11 +4943,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
updateDAGForMaybeTailCall(MM);
return nullptr;
}
- case Intrinsic::memcpy_element_atomic: {
- SDValue Dst = getValue(I.getArgOperand(0));
- SDValue Src = getValue(I.getArgOperand(1));
- SDValue NumElements = getValue(I.getArgOperand(2));
- SDValue ElementSize = getValue(I.getArgOperand(3));
+ case Intrinsic::memcpy_element_unordered_atomic: {
+ const ElementUnorderedAtomicMemCpyInst &MI =
+ cast<ElementUnorderedAtomicMemCpyInst>(I);
+ SDValue Dst = getValue(MI.getRawDest());
+ SDValue Src = getValue(MI.getRawSource());
+ SDValue Length = getValue(MI.getLength());
// Emit a library call.
TargetLowering::ArgListTy Args;
@@ -4958,18 +4960,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Entry.Node = Src;
Args.push_back(Entry);
- Entry.Ty = I.getArgOperand(2)->getType();
- Entry.Node = NumElements;
- Args.push_back(Entry);
-
- Entry.Ty = Type::getInt32Ty(*DAG.getContext());
- Entry.Node = ElementSize;
+ Entry.Ty = MI.getLength()->getType();
+ Entry.Node = Length;
Args.push_back(Entry);
- uint64_t ElementSizeConstant =
- cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ uint64_t ElementSizeConstant = MI.getElementSizeInBytes();
RTLIB::Libcall LibraryCall =
- RTLIB::getMEMCPY_ELEMENT_ATOMIC(ElementSizeConstant);
+ RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant);
if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
report_fatal_error("Unsupported element size");
@@ -6030,9 +6027,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
SwiftErrorVal = V;
// We find the virtual register for the actual swifterror argument.
// Instead of using the Value, we use the virtual register instead.
- Entry.Node =
- DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, V),
- EVT(TLI.getPointerTy(DL)));
+ Entry.Node = DAG.getRegister(FuncInfo
+ .getOrCreateSwiftErrorVRegUseAt(
+ CS.getInstruction(), FuncInfo.MBB, V)
+ .first,
+ EVT(TLI.getPointerTy(DL)));
}
Args.push_back(Entry);
@@ -6073,11 +6072,13 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
if (SwiftErrorVal && TLI.supportSwiftError()) {
// Get the last element of InVals.
SDValue Src = CLI.InVals.back();
- const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
- unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC);
+ unsigned VReg; bool CreatedVReg;
+ std::tie(VReg, CreatedVReg) =
+ FuncInfo.getOrCreateSwiftErrorVRegDefAt(CS.getInstruction());
SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
// We update the virtual register for the actual swifterror argument.
- FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg);
+ if (CreatedVReg)
+ FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg);
DAG.setRoot(CopyNode);
}
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index b67f11f85b70..dcccd17bb98e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -1055,6 +1055,7 @@ static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI,
FuncInfo->SwiftErrorVals.clear();
FuncInfo->SwiftErrorVRegDefMap.clear();
FuncInfo->SwiftErrorVRegUpwardsUse.clear();
+ FuncInfo->SwiftErrorVRegDefUses.clear();
FuncInfo->SwiftErrorArg = nullptr;
// Check if function has a swifterror argument.
@@ -1278,6 +1279,80 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
}
}
+void preassignSwiftErrorRegs(const TargetLowering *TLI,
+ FunctionLoweringInfo *FuncInfo,
+ BasicBlock::const_iterator Begin,
+ BasicBlock::const_iterator End) {
+ if (!TLI->supportSwiftError() || FuncInfo->SwiftErrorVals.empty())
+ return;
+
+ // Iterator over instructions and assign vregs to swifterror defs and uses.
+ for (auto It = Begin; It != End; ++It) {
+ ImmutableCallSite CS(&*It);
+ if (CS) {
+ // A call-site with a swifterror argument is both use and def.
+ const Value *SwiftErrorAddr = nullptr;
+ for (auto &Arg : CS.args()) {
+ if (!Arg->isSwiftError())
+ continue;
+ // Use of swifterror.
+ assert(!SwiftErrorAddr && "Cannot have multiple swifterror arguments");
+ SwiftErrorAddr = &*Arg;
+ assert(SwiftErrorAddr->isSwiftError() &&
+ "Must have a swifterror value argument");
+ unsigned VReg; bool CreatedReg;
+ std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt(
+ &*It, FuncInfo->MBB, SwiftErrorAddr);
+ assert(CreatedReg);
+ }
+ if (!SwiftErrorAddr)
+ continue;
+
+ // Def of swifterror.
+ unsigned VReg; bool CreatedReg;
+ std::tie(VReg, CreatedReg) =
+ FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It);
+ assert(CreatedReg);
+ FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg);
+
+ // A load is a use.
+ } else if (const LoadInst *LI = dyn_cast<const LoadInst>(&*It)) {
+ const Value *V = LI->getOperand(0);
+ if (!V->isSwiftError())
+ continue;
+
+ unsigned VReg; bool CreatedReg;
+ std::tie(VReg, CreatedReg) =
+ FuncInfo->getOrCreateSwiftErrorVRegUseAt(LI, FuncInfo->MBB, V);
+ assert(CreatedReg);
+
+ // A store is a def.
+ } else if (const StoreInst *SI = dyn_cast<const StoreInst>(&*It)) {
+ const Value *SwiftErrorAddr = SI->getOperand(1);
+ if (!SwiftErrorAddr->isSwiftError())
+ continue;
+
+ // Def of swifterror.
+ unsigned VReg; bool CreatedReg;
+ std::tie(VReg, CreatedReg) =
+ FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It);
+ assert(CreatedReg);
+ FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg);
+
+ // A return in a swiferror returning function is a use.
+ } else if (const ReturnInst *R = dyn_cast<const ReturnInst>(&*It)) {
+ const Function *F = R->getParent()->getParent();
+ if(!F->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+ continue;
+
+ unsigned VReg; bool CreatedReg;
+ std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt(
+ R, FuncInfo->MBB, FuncInfo->SwiftErrorArg);
+ assert(CreatedReg);
+ }
+ }
+}
+
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastISelFailed = false;
// Initialize the Fast-ISel state, if needed.
@@ -1384,6 +1459,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastIS->startNewBlock();
unsigned NumFastIselRemaining = std::distance(Begin, End);
+
+ // Pre-assign swifterror vregs.
+ preassignSwiftErrorRegs(TLI, FuncInfo, Begin, End);
+
// Do FastISel on as many instructions as possible.
for (; BI != Begin; --BI) {
const Instruction *Inst = &*std::prev(BI);
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 3a50aaa69985..008b984dd961 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -569,8 +569,7 @@ SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg,
// Greedy heuristic: Keep iterating keeping the best covering subreg index
// each time.
- LaneBitmask LanesLeft =
- LaneMask & ~(TRI.getSubRegIndexLaneMask(BestCover));
+ LaneBitmask LanesLeft = LaneMask & ~(TRI.getSubRegIndexLaneMask(BestIdx));
while (LanesLeft.any()) {
unsigned BestIdx = 0;
int BestCover = INT_MIN;
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index acb3676fdd71..6bac39c7ee77 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -86,10 +86,134 @@ STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
STATISTIC(StackSlotMerged, "Number of stack slot merged.");
STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
+//===----------------------------------------------------------------------===//
+// StackColoring Pass
+//===----------------------------------------------------------------------===//
+//
+// Stack Coloring reduces stack usage by merging stack slots when they
+// can't be used together. For example, consider the following C program:
+//
+// void bar(char *, int);
+// void foo(bool var) {
+// A: {
+// char z[4096];
+// bar(z, 0);
+// }
+//
+// char *p;
+// char x[4096];
+// char y[4096];
+// if (var) {
+// p = x;
+// } else {
+// bar(y, 1);
+// p = y + 1024;
+// }
+// B:
+// bar(p, 2);
+// }
+//
+// Naively-compiled, this program would use 12k of stack space. However, the
+// stack slot corresponding to `z` is always destroyed before either of the
+// stack slots for `x` or `y` are used, and then `x` is only used if `var`
+// is true, while `y` is only used if `var` is false. So in no time are 2
+// of the stack slots used together, and therefore we can merge them,
+// compiling the function using only a single 4k alloca:
+//
+// void foo(bool var) { // equivalent
+// char x[4096];
+// char *p;
+// bar(x, 0);
+// if (var) {
+// p = x;
+// } else {
+// bar(x, 1);
+// p = x + 1024;
+// }
+// bar(p, 2);
+// }
+//
+// This is an important optimization if we want stack space to be under
+// control in large functions, both open-coded ones and ones created by
+// inlining.
//
// Implementation Notes:
// ---------------------
//
+// An important part of the above reasoning is that `z` can't be accessed
+// while the latter 2 calls to `bar` are running. This is justified because
+// `z`'s lifetime is over after we exit from block `A:`, so any further
+// accesses to it would be UB. The way we represent this information
+// in LLVM is by having frontends delimit blocks with `lifetime.start`
+// and `lifetime.end` intrinsics.
+//
+// The effect of these intrinsics seems to be as follows (maybe I should
+// specify this in the reference?):
+//
+// L1) at start, each stack-slot is marked as *out-of-scope*, unless no
+// lifetime intrinsic refers to that stack slot, in which case
+// it is marked as *in-scope*.
+// L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
+// the stack slot is overwritten with `undef`.
+// L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
+// L4) on function exit, all stack slots are marked as *out-of-scope*.
+// L5) `lifetime.end` is a no-op when called on a slot that is already
+// *out-of-scope*.
+// L6) memory accesses to *out-of-scope* stack slots are UB.
+// L7) when a stack-slot is marked as *out-of-scope*, all pointers to it
+// are invalidated, unless the slot is "degenerate". This is used to
+// justify not marking slots as in-use until the pointer to them is
+// used, but feels a bit hacky in the presence of things like LICM. See
+// the "Degenerate Slots" section for more details.
+//
+// Now, let's ground stack coloring on these rules. We'll define a slot
+// as *in-use* at a (dynamic) point in execution if it either can be
+// written to at that point, or if it has a live and non-undef content
+// at that point.
+//
+// Obviously, slots that are never *in-use* together can be merged, and
+// in our example `foo`, the slots for `x`, `y` and `z` are never
+// in-use together (of course, sometimes slots that *are* in-use together
+// might still be mergable, but we don't care about that here).
+//
+// In this implementation, we successively merge pairs of slots that are
+// not *in-use* together. We could be smarter - for example, we could merge
+// a single large slot with 2 small slots, or we could construct the
+// interference graph and run a "smart" graph coloring algorithm, but with
+// that aside, how do we find out whether a pair of slots might be *in-use*
+// together?
+//
+// From our rules, we see that *out-of-scope* slots are never *in-use*,
+// and from (L7) we see that "non-degenerate" slots remain non-*in-use*
+// until their address is taken. Therefore, we can approximate slot activity
+// using dataflow.
+//
+// A subtle point: naively, we might try to figure out which pairs of
+// stack-slots interfere by propagating `S in-use` through the CFG for every
+// stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in
+// which they are both *in-use*.
+//
+// That is sound, but overly conservative in some cases: in our (artificial)
+// example `foo`, either `x` or `y` might be in use at the label `B:`, but
+// as `x` is only in use if we came in from the `var` edge and `y` only
+// if we came from the `!var` edge, they still can't be in use together.
+// See PR32488 for an important real-life case.
+//
+// If we wanted to find all points of interference precisely, we could
+// propagate `S in-use` and `S&T in-use` predicates through the CFG. That
+// would be precise, but requires propagating `O(n^2)` dataflow facts.
+//
+// However, we aren't interested in the *set* of points of interference
+// between 2 stack slots, only *whether* there *is* such a point. So we
+// can rely on a little trick: for `S` and `T` to be in-use together,
+// one of them needs to become in-use while the other is in-use (or
+// they might both become in use simultaneously). We can check this
+// by also keeping track of the points at which a stack slot might *start*
+// being in-use.
+//
+// Exact first use:
+// ----------------
+//
// Consider the following motivating example:
//
// int foo() {
@@ -158,6 +282,9 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
// lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
// byte stack (better).
//
+// Degenerate Slots:
+// -----------------
+//
// Relying entirely on first-use of stack slots is problematic,
// however, due to the fact that optimizations can sometimes migrate
// uses of a variable outside of its lifetime start/end region. Here
@@ -237,10 +364,6 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
// for "b" then it will appear that 'b' has a degenerate lifetime.
//
-//===----------------------------------------------------------------------===//
-// StackColoring Pass
-//===----------------------------------------------------------------------===//
-
namespace {
/// StackColoring - A machine pass for merging disjoint stack allocations,
/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
@@ -271,8 +394,11 @@ class StackColoring : public MachineFunctionPass {
/// Maps basic blocks to a serial number.
SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering;
- /// Maps liveness intervals for each slot.
+ /// Maps slots to their use interval. Outside of this interval, slots
+ /// values are either dead or `undef` and they will not be written to.
SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals;
+ /// Maps slots to the points where they can become in-use.
+ SmallVector<SmallVector<SlotIndex, 4>, 16> LiveStarts;
/// VNInfo is used for the construction of LiveIntervals.
VNInfo::Allocator VNInfoAllocator;
/// SlotIndex analysis object.
@@ -672,15 +798,22 @@ void StackColoring::calculateLocalLiveness()
void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
SmallVector<SlotIndex, 16> Starts;
- SmallVector<SlotIndex, 16> Finishes;
+ SmallVector<bool, 16> DefinitelyInUse;
// For each block, find which slots are active within this block
// and update the live intervals.
for (const MachineBasicBlock &MBB : *MF) {
Starts.clear();
Starts.resize(NumSlots);
- Finishes.clear();
- Finishes.resize(NumSlots);
+ DefinitelyInUse.clear();
+ DefinitelyInUse.resize(NumSlots);
+
+ // Start the interval of the slots that we previously found to be 'in-use'.
+ BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
+ for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
+ pos = MBBLiveness.LiveIn.find_next(pos)) {
+ Starts[pos] = Indexes->getMBBStartIdx(&MBB);
+ }
// Create the interval for the basic blocks containing lifetime begin/end.
for (const MachineInstr &MI : MBB) {
@@ -692,66 +825,35 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
for (auto Slot : slots) {
if (IsStart) {
- if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex)
+ // If a slot is already definitely in use, we don't have to emit
+ // a new start marker because there is already a pre-existing
+ // one.
+ if (!DefinitelyInUse[Slot]) {
+ LiveStarts[Slot].push_back(ThisIndex);
+ DefinitelyInUse[Slot] = true;
+ }
+ if (!Starts[Slot].isValid())
Starts[Slot] = ThisIndex;
} else {
- if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex)
- Finishes[Slot] = ThisIndex;
+ if (Starts[Slot].isValid()) {
+ VNInfo *VNI = Intervals[Slot]->getValNumInfo(0);
+ Intervals[Slot]->addSegment(
+ LiveInterval::Segment(Starts[Slot], ThisIndex, VNI));
+ Starts[Slot] = SlotIndex(); // Invalidate the start index
+ DefinitelyInUse[Slot] = false;
+ }
}
}
}
- // Create the interval of the blocks that we previously found to be 'alive'.
- BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
- for (unsigned pos : MBBLiveness.LiveIn.set_bits()) {
- Starts[pos] = Indexes->getMBBStartIdx(&MBB);
- }
- for (unsigned pos : MBBLiveness.LiveOut.set_bits()) {
- Finishes[pos] = Indexes->getMBBEndIdx(&MBB);
- }
-
+ // Finish up started segments
for (unsigned i = 0; i < NumSlots; ++i) {
- //
- // When LifetimeStartOnFirstUse is turned on, data flow analysis
- // is forward (from starts to ends), not bidirectional. A
- // consequence of this is that we can wind up in situations
- // where Starts[i] is invalid but Finishes[i] is valid and vice
- // versa. Example:
- //
- // LIFETIME_START x
- // if (...) {
- // <use of x>
- // throw ...;
- // }
- // LIFETIME_END x
- // return 2;
- //
- //
- // Here the slot for "x" will not be live into the block
- // containing the "return 2" (since lifetimes start with first
- // use, not at the dominating LIFETIME_START marker).
- //
- if (Starts[i].isValid() && !Finishes[i].isValid()) {
- Finishes[i] = Indexes->getMBBEndIdx(&MBB);
- }
if (!Starts[i].isValid())
continue;
- assert(Starts[i] && Finishes[i] && "Invalid interval");
- VNInfo *ValNum = Intervals[i]->getValNumInfo(0);
- SlotIndex S = Starts[i];
- SlotIndex F = Finishes[i];
- if (S < F) {
- // We have a single consecutive region.
- Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum));
- } else {
- // We have two non-consecutive regions. This happens when
- // LIFETIME_START appears after the LIFETIME_END marker.
- SlotIndex NewStart = Indexes->getMBBStartIdx(&MBB);
- SlotIndex NewFin = Indexes->getMBBEndIdx(&MBB);
- Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum));
- Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum));
- }
+ SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB);
+ VNInfo *VNI = Intervals[i]->getValNumInfo(0);
+ Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
}
}
}
@@ -981,6 +1083,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
BasicBlockNumbering.clear();
Markers.clear();
Intervals.clear();
+ LiveStarts.clear();
VNInfoAllocator.Reset();
unsigned NumSlots = MFI->getObjectIndexEnd();
@@ -992,6 +1095,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
SmallVector<int, 8> SortedSlots;
SortedSlots.reserve(NumSlots);
Intervals.reserve(NumSlots);
+ LiveStarts.resize(NumSlots);
unsigned NumMarkers = collectMarkers(NumSlots);
@@ -1063,6 +1167,9 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
});
+ for (auto &s : LiveStarts)
+ std::sort(s.begin(), s.end());
+
bool Changed = true;
while (Changed) {
Changed = false;
@@ -1078,12 +1185,22 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
int SecondSlot = SortedSlots[J];
LiveInterval *First = &*Intervals[FirstSlot];
LiveInterval *Second = &*Intervals[SecondSlot];
+ auto &FirstS = LiveStarts[FirstSlot];
+ auto &SecondS = LiveStarts[SecondSlot];
assert (!First->empty() && !Second->empty() && "Found an empty range");
- // Merge disjoint slots.
- if (!First->overlaps(*Second)) {
+ // Merge disjoint slots. This is a little bit tricky - see the
+ // Implementation Notes section for an explanation.
+ if (!First->isLiveAtIndexes(SecondS) &&
+ !Second->isLiveAtIndexes(FirstS)) {
Changed = true;
First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
+
+ int OldSize = FirstS.size();
+ FirstS.append(SecondS.begin(), SecondS.end());
+ auto Mid = FirstS.begin() + OldSize;
+ std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
+
SlotRemap[SecondSlot] = FirstSlot;
SortedSlots[J] = -1;
DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<<
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 581cfaf60755..e9d38c10c860 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -374,11 +374,16 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::MEMCPY] = "memcpy";
Names[RTLIB::MEMMOVE] = "memmove";
Names[RTLIB::MEMSET] = "memset";
- Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_1] = "__llvm_memcpy_element_atomic_1";
- Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_2] = "__llvm_memcpy_element_atomic_2";
- Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_4] = "__llvm_memcpy_element_atomic_4";
- Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_8] = "__llvm_memcpy_element_atomic_8";
- Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_16] = "__llvm_memcpy_element_atomic_16";
+ Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_1] =
+ "__llvm_memcpy_element_unordered_atomic_1";
+ Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_2] =
+ "__llvm_memcpy_element_unordered_atomic_2";
+ Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_4] =
+ "__llvm_memcpy_element_unordered_atomic_4";
+ Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_8] =
+ "__llvm_memcpy_element_unordered_atomic_8";
+ Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_16] =
+ "__llvm_memcpy_element_unordered_atomic_16";
Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
@@ -781,22 +786,21 @@ RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) {
return UNKNOWN_LIBCALL;
}
-RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_ATOMIC(uint64_t ElementSize) {
+RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
switch (ElementSize) {
case 1:
- return MEMCPY_ELEMENT_ATOMIC_1;
+ return MEMCPY_ELEMENT_UNORDERED_ATOMIC_1;
case 2:
- return MEMCPY_ELEMENT_ATOMIC_2;
+ return MEMCPY_ELEMENT_UNORDERED_ATOMIC_2;
case 4:
- return MEMCPY_ELEMENT_ATOMIC_4;
+ return MEMCPY_ELEMENT_UNORDERED_ATOMIC_4;
case 8:
- return MEMCPY_ELEMENT_ATOMIC_8;
+ return MEMCPY_ELEMENT_UNORDERED_ATOMIC_8;
case 16:
- return MEMCPY_ELEMENT_ATOMIC_16;
+ return MEMCPY_ELEMENT_UNORDERED_ATOMIC_16;
default:
return UNKNOWN_LIBCALL;
}
-
}
/// InitCmpLibcallCCs - Set default comparison libcall CC.
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index a0c68e1dcce8..6922e33c8d6c 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -61,9 +61,11 @@
using namespace llvm;
using namespace dwarf;
-static void GetObjCImageInfo(ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
- unsigned &Version, unsigned &Flags,
+static void GetObjCImageInfo(Module &M, unsigned &Version, unsigned &Flags,
StringRef &Section) {
+ SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
+ M.getModuleFlagsMetadata(ModuleFlags);
+
for (const auto &MFE: ModuleFlags) {
// Ignore flags with 'Require' behaviour.
if (MFE.Behavior == Module::Require)
@@ -88,14 +90,13 @@ static void GetObjCImageInfo(ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
// ELF
//===----------------------------------------------------------------------===//
-void TargetLoweringObjectFileELF::emitModuleFlags(
- MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
- const TargetMachine &TM) const {
+void TargetLoweringObjectFileELF::emitModuleMetadata(
+ MCStreamer &Streamer, Module &M, const TargetMachine &TM) const {
unsigned Version = 0;
unsigned Flags = 0;
StringRef Section;
- GetObjCImageInfo(ModuleFlags, Version, Flags, Section);
+ GetObjCImageInfo(M, Version, Flags, Section);
if (Section.empty())
return;
@@ -618,20 +619,10 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
}
}
-/// emitModuleFlags - Perform code emission for module flags.
-void TargetLoweringObjectFileMachO::emitModuleFlags(
- MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
- const TargetMachine &TM) const {
- MDNode *LinkerOptions = nullptr;
-
- for (const auto &MFE : ModuleFlags) {
- StringRef Key = MFE.Key->getString();
- if (Key == "Linker Options")
- LinkerOptions = cast<MDNode>(MFE.Val);
- }
-
+void TargetLoweringObjectFileMachO::emitModuleMetadata(
+ MCStreamer &Streamer, Module &M, const TargetMachine &TM) const {
// Emit the linker options if present.
- if (LinkerOptions) {
+ if (auto *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) {
for (const auto &Option : LinkerOptions->operands()) {
SmallVector<std::string, 4> StrOptions;
for (const auto &Piece : cast<MDNode>(Option)->operands())
@@ -643,7 +634,8 @@ void TargetLoweringObjectFileMachO::emitModuleFlags(
unsigned VersionVal = 0;
unsigned ImageInfoFlags = 0;
StringRef SectionVal;
- GetObjCImageInfo(ModuleFlags, VersionVal, ImageInfoFlags, SectionVal);
+
+ GetObjCImageInfo(M, VersionVal, ImageInfoFlags, SectionVal);
// The section is mandatory. If we don't have it, then we don't have GC info.
if (SectionVal.empty())
@@ -1159,18 +1151,9 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID);
}
-void TargetLoweringObjectFileCOFF::emitModuleFlags(
- MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
- const TargetMachine &TM) const {
- MDNode *LinkerOptions = nullptr;
-
- for (const auto &MFE : ModuleFlags) {
- StringRef Key = MFE.Key->getString();
- if (Key == "Linker Options")
- LinkerOptions = cast<MDNode>(MFE.Val);
- }
-
- if (LinkerOptions) {
+void TargetLoweringObjectFileCOFF::emitModuleMetadata(
+ MCStreamer &Streamer, Module &M, const TargetMachine &TM) const {
+ if (NamedMDNode *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) {
// Emit the linker options to the linker .drectve section. According to the
// spec, this section is a space-separated string containing flags for
// linker.
@@ -1190,7 +1173,7 @@ void TargetLoweringObjectFileCOFF::emitModuleFlags(
unsigned Flags = 0;
StringRef Section;
- GetObjCImageInfo(ModuleFlags, Version, Flags, Section);
+ GetObjCImageInfo(M, Version, Flags, Section);
if (Section.empty())
return;
diff --git a/lib/DebugInfo/CodeView/CMakeLists.txt b/lib/DebugInfo/CodeView/CMakeLists.txt
index 2f9e8981b698..f916695a8439 100644
--- a/lib/DebugInfo/CodeView/CMakeLists.txt
+++ b/lib/DebugInfo/CodeView/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_library(LLVMDebugInfoCodeView
LazyRandomTypeCollection.cpp
Line.cpp
RecordSerialization.cpp
+ StringsAndChecksums.cpp
SymbolRecordMapping.cpp
SymbolDumper.cpp
SymbolSerializer.cpp
@@ -32,7 +33,7 @@ add_llvm_library(LLVMDebugInfoCodeView
TypeSerializer.cpp
TypeStreamMerger.cpp
TypeTableCollection.cpp
-
+
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/CodeView
)
diff --git a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
index 6e647c4b976b..de02525270c4 100644
--- a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
@@ -58,6 +58,10 @@ Error DebugStringTableSubsection::commit(BinaryStreamWriter &Writer) const {
uint32_t Begin = Writer.getOffset();
uint32_t End = Begin + StringSize;
+ // Write a null string at the beginning.
+ if (auto EC = Writer.writeCString(StringRef()))
+ return EC;
+
for (auto &Pair : Strings) {
StringRef S = Pair.getKey();
uint32_t Offset = Begin + Pair.getValue();
@@ -68,6 +72,7 @@ Error DebugStringTableSubsection::commit(BinaryStreamWriter &Writer) const {
}
Writer.setOffset(End);
+ assert((End - Begin) == StringSize);
return Error::success();
}
diff --git a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
index e9124e68fe82..334c5e002bbc 100644
--- a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
+++ b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
@@ -50,7 +50,7 @@ DebugSubsectionKind DebugSubsectionRecord::kind() const { return Kind; }
BinaryStreamRef DebugSubsectionRecord::getRecordData() const { return Data; }
DebugSubsectionRecordBuilder::DebugSubsectionRecordBuilder(
- std::unique_ptr<DebugSubsection> Subsection, CodeViewContainer Container)
+ std::shared_ptr<DebugSubsection> Subsection, CodeViewContainer Container)
: Subsection(std::move(Subsection)), Container(Container) {}
uint32_t DebugSubsectionRecordBuilder::calculateSerializedLength() {
diff --git a/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp b/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp
index 8550107741ce..9b824333369b 100644
--- a/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp
+++ b/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp
@@ -26,40 +26,9 @@
using namespace llvm;
using namespace llvm::codeview;
-DebugSubsectionState::DebugSubsectionState() {}
-
-DebugSubsectionState::DebugSubsectionState(
- const DebugStringTableSubsectionRef &Strings)
- : Strings(&Strings) {}
-
-DebugSubsectionState::DebugSubsectionState(
- const DebugStringTableSubsectionRef &Strings,
- const DebugChecksumsSubsectionRef &Checksums)
- : Strings(&Strings), Checksums(&Checksums) {}
-
-void DebugSubsectionState::initializeStrings(const DebugSubsectionRecord &SR) {
- assert(SR.kind() == DebugSubsectionKind::StringTable);
- assert(!Strings && "Found a string table even though we already have one!");
-
- OwnedStrings = llvm::make_unique<DebugStringTableSubsectionRef>();
- consumeError(OwnedStrings->initialize(SR.getRecordData()));
- Strings = OwnedStrings.get();
-}
-
-void DebugSubsectionState::initializeChecksums(
- const DebugSubsectionRecord &FCR) {
- assert(FCR.kind() == DebugSubsectionKind::FileChecksums);
- if (Checksums)
- return;
-
- OwnedChecksums = llvm::make_unique<DebugChecksumsSubsectionRef>();
- consumeError(OwnedChecksums->initialize(FCR.getRecordData()));
- Checksums = OwnedChecksums.get();
-}
-
-Error llvm::codeview::visitDebugSubsection(const DebugSubsectionRecord &R,
- DebugSubsectionVisitor &V,
- const DebugSubsectionState &State) {
+Error llvm::codeview::visitDebugSubsection(
+ const DebugSubsectionRecord &R, DebugSubsectionVisitor &V,
+ const StringsAndChecksumsRef &State) {
BinaryStreamReader Reader(R.getRecordData());
switch (R.kind()) {
case DebugSubsectionKind::Lines: {
diff --git a/lib/DebugInfo/CodeView/StringsAndChecksums.cpp b/lib/DebugInfo/CodeView/StringsAndChecksums.cpp
new file mode 100644
index 000000000000..928bf8c94f73
--- /dev/null
+++ b/lib/DebugInfo/CodeView/StringsAndChecksums.cpp
@@ -0,0 +1,55 @@
+//===- StringsAndChecksums.cpp ----------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
+#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+StringsAndChecksumsRef::StringsAndChecksumsRef() {}
+
+StringsAndChecksumsRef::StringsAndChecksumsRef(
+ const DebugStringTableSubsectionRef &Strings)
+ : Strings(&Strings) {}
+
+StringsAndChecksumsRef::StringsAndChecksumsRef(
+ const DebugStringTableSubsectionRef &Strings,
+ const DebugChecksumsSubsectionRef &Checksums)
+ : Strings(&Strings), Checksums(&Checksums) {}
+
+void StringsAndChecksumsRef::initializeStrings(
+ const DebugSubsectionRecord &SR) {
+ assert(SR.kind() == DebugSubsectionKind::StringTable);
+ assert(!Strings && "Found a string table even though we already have one!");
+
+ OwnedStrings = llvm::make_unique<DebugStringTableSubsectionRef>();
+ consumeError(OwnedStrings->initialize(SR.getRecordData()));
+ Strings = OwnedStrings.get();
+}
+
+void StringsAndChecksumsRef::setChecksums(
+ const DebugChecksumsSubsectionRef &CS) {
+ OwnedChecksums = llvm::make_unique<DebugChecksumsSubsectionRef>();
+ *OwnedChecksums = CS;
+ Checksums = OwnedChecksums.get();
+}
+
+void StringsAndChecksumsRef::initializeChecksums(
+ const DebugSubsectionRecord &FCR) {
+ assert(FCR.kind() == DebugSubsectionKind::FileChecksums);
+ if (Checksums)
+ return;
+
+ OwnedChecksums = llvm::make_unique<DebugChecksumsSubsectionRef>();
+ consumeError(OwnedChecksums->initialize(FCR.getRecordData()));
+ Checksums = OwnedChecksums.get();
+}
diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp
index 66045933ce9b..36abafc079ed 100644
--- a/lib/DebugInfo/CodeView/SymbolDumper.cpp
+++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp
@@ -212,7 +212,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
FileStaticSym &FileStatic) {
DictScope S(W, "FileStatic");
- W.printNumber("Index", FileStatic.Index);
+ printTypeIndex("Index", FileStatic.Index);
W.printNumber("ModFilenameOffset", FileStatic.ModFilenameOffset);
W.printFlags("Flags", uint16_t(FileStatic.Flags), getLocalFlagNames());
W.printString("Name", FileStatic.Name);
@@ -516,7 +516,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
RegisterSym &Register) {
DictScope S(W, "RegisterSym");
- W.printNumber("Type", Register.Index);
+ printTypeIndex("Type", Register.Index);
W.printEnum("Seg", uint16_t(Register.Register), getRegisterNames());
W.printString("Name", Register.Name);
return Error::success();
@@ -524,7 +524,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, PublicSym32 &Public) {
DictScope S(W, "PublicSym");
- W.printNumber("Type", Public.Index);
+ printTypeIndex("Type", Public.Index);
W.printNumber("Seg", Public.Segment);
W.printNumber("Off", Public.Offset);
W.printString("Name", Public.Name);
@@ -631,7 +631,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
W.printHex("Offset", RegRel.Offset);
printTypeIndex("Type", RegRel.Type);
- W.printHex("Register", RegRel.Register);
+ W.printEnum("Register", uint16_t(RegRel.Register), getRegisterNames());
W.printString("VarName", RegRel.Name);
return Error::success();
}
diff --git a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
index ea46841a70f6..d731dc1b0a37 100644
--- a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
+++ b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
@@ -307,7 +307,7 @@ Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
error(IO.mapInteger(FrameCookie.CodeOffset));
error(IO.mapInteger(FrameCookie.Register));
- error(IO.mapInteger(FrameCookie.CookieKind));
+ error(IO.mapEnum(FrameCookie.CookieKind));
error(IO.mapInteger(FrameCookie.Flags));
return Error::success();
@@ -439,7 +439,7 @@ Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
error(IO.mapInteger(RegRel.Offset));
error(IO.mapInteger(RegRel.Type));
- error(IO.mapInteger(RegRel.Register));
+ error(IO.mapEnum(RegRel.Register));
error(IO.mapStringZ(RegRel.Name));
return Error::success();
diff --git a/lib/DebugInfo/CodeView/TypeDatabase.cpp b/lib/DebugInfo/CodeView/TypeDatabase.cpp
index af05d2dc294b..08f848b36a9d 100644
--- a/lib/DebugInfo/CodeView/TypeDatabase.cpp
+++ b/lib/DebugInfo/CodeView/TypeDatabase.cpp
@@ -12,59 +12,6 @@
using namespace llvm;
using namespace llvm::codeview;
-namespace {
-struct SimpleTypeEntry {
- StringRef Name;
- SimpleTypeKind Kind;
-};
-}
-
-/// The names here all end in "*". If the simple type is a pointer type, we
-/// return the whole name. Otherwise we lop off the last character in our
-/// StringRef.
-static const SimpleTypeEntry SimpleTypeNames[] = {
- {"void*", SimpleTypeKind::Void},
- {"<not translated>*", SimpleTypeKind::NotTranslated},
- {"HRESULT*", SimpleTypeKind::HResult},
- {"signed char*", SimpleTypeKind::SignedCharacter},
- {"unsigned char*", SimpleTypeKind::UnsignedCharacter},
- {"char*", SimpleTypeKind::NarrowCharacter},
- {"wchar_t*", SimpleTypeKind::WideCharacter},
- {"char16_t*", SimpleTypeKind::Character16},
- {"char32_t*", SimpleTypeKind::Character32},
- {"__int8*", SimpleTypeKind::SByte},
- {"unsigned __int8*", SimpleTypeKind::Byte},
- {"short*", SimpleTypeKind::Int16Short},
- {"unsigned short*", SimpleTypeKind::UInt16Short},
- {"__int16*", SimpleTypeKind::Int16},
- {"unsigned __int16*", SimpleTypeKind::UInt16},
- {"long*", SimpleTypeKind::Int32Long},
- {"unsigned long*", SimpleTypeKind::UInt32Long},
- {"int*", SimpleTypeKind::Int32},
- {"unsigned*", SimpleTypeKind::UInt32},
- {"__int64*", SimpleTypeKind::Int64Quad},
- {"unsigned __int64*", SimpleTypeKind::UInt64Quad},
- {"__int64*", SimpleTypeKind::Int64},
- {"unsigned __int64*", SimpleTypeKind::UInt64},
- {"__int128*", SimpleTypeKind::Int128},
- {"unsigned __int128*", SimpleTypeKind::UInt128},
- {"__half*", SimpleTypeKind::Float16},
- {"float*", SimpleTypeKind::Float32},
- {"float*", SimpleTypeKind::Float32PartialPrecision},
- {"__float48*", SimpleTypeKind::Float48},
- {"double*", SimpleTypeKind::Float64},
- {"long double*", SimpleTypeKind::Float80},
- {"__float128*", SimpleTypeKind::Float128},
- {"_Complex float*", SimpleTypeKind::Complex32},
- {"_Complex double*", SimpleTypeKind::Complex64},
- {"_Complex long double*", SimpleTypeKind::Complex80},
- {"_Complex __float128*", SimpleTypeKind::Complex128},
- {"bool*", SimpleTypeKind::Boolean8},
- {"__bool16*", SimpleTypeKind::Boolean16},
- {"__bool32*", SimpleTypeKind::Boolean32},
- {"__bool64*", SimpleTypeKind::Boolean64},
-};
-
TypeDatabase::TypeDatabase(uint32_t Capacity) : TypeNameStorage(Allocator) {
CVUDTNames.resize(Capacity);
TypeRecords.resize(Capacity);
@@ -103,22 +50,8 @@ StringRef TypeDatabase::saveTypeName(StringRef TypeName) {
}
StringRef TypeDatabase::getTypeName(TypeIndex Index) const {
- if (Index.isNoneType())
- return "<no type>";
-
- if (Index.isSimple()) {
- // This is a simple type.
- for (const auto &SimpleTypeName : SimpleTypeNames) {
- if (SimpleTypeName.Kind == Index.getSimpleKind()) {
- if (Index.getSimpleMode() == SimpleTypeMode::Direct)
- return SimpleTypeName.Name.drop_back(1);
- // Otherwise, this is a pointer type. We gloss over the distinction
- // between near, far, 64, 32, etc, and just give a pointer type.
- return SimpleTypeName.Name;
- }
- }
- return "<unknown simple type>";
- }
+ if (Index.isNoneType() || Index.isSimple())
+ return TypeIndex::simpleTypeName(Index);
if (contains(Index))
return CVUDTNames[Index.toArrayIndex()];
diff --git a/lib/DebugInfo/CodeView/TypeIndex.cpp b/lib/DebugInfo/CodeView/TypeIndex.cpp
index 20ba6470cd5b..24fe5fcb28d4 100644
--- a/lib/DebugInfo/CodeView/TypeIndex.cpp
+++ b/lib/DebugInfo/CodeView/TypeIndex.cpp
@@ -15,11 +15,88 @@
using namespace llvm;
using namespace llvm::codeview;
+namespace {
+struct SimpleTypeEntry {
+ StringRef Name;
+ SimpleTypeKind Kind;
+};
+
+/// The names here all end in "*". If the simple type is a pointer type, we
+/// return the whole name. Otherwise we lop off the last character in our
+/// StringRef.
+static const SimpleTypeEntry SimpleTypeNames[] = {
+ {"void*", SimpleTypeKind::Void},
+ {"<not translated>*", SimpleTypeKind::NotTranslated},
+ {"HRESULT*", SimpleTypeKind::HResult},
+ {"signed char*", SimpleTypeKind::SignedCharacter},
+ {"unsigned char*", SimpleTypeKind::UnsignedCharacter},
+ {"char*", SimpleTypeKind::NarrowCharacter},
+ {"wchar_t*", SimpleTypeKind::WideCharacter},
+ {"char16_t*", SimpleTypeKind::Character16},
+ {"char32_t*", SimpleTypeKind::Character32},
+ {"__int8*", SimpleTypeKind::SByte},
+ {"unsigned __int8*", SimpleTypeKind::Byte},
+ {"short*", SimpleTypeKind::Int16Short},
+ {"unsigned short*", SimpleTypeKind::UInt16Short},
+ {"__int16*", SimpleTypeKind::Int16},
+ {"unsigned __int16*", SimpleTypeKind::UInt16},
+ {"long*", SimpleTypeKind::Int32Long},
+ {"unsigned long*", SimpleTypeKind::UInt32Long},
+ {"int*", SimpleTypeKind::Int32},
+ {"unsigned*", SimpleTypeKind::UInt32},
+ {"__int64*", SimpleTypeKind::Int64Quad},
+ {"unsigned __int64*", SimpleTypeKind::UInt64Quad},
+ {"__int64*", SimpleTypeKind::Int64},
+ {"unsigned __int64*", SimpleTypeKind::UInt64},
+ {"__int128*", SimpleTypeKind::Int128},
+ {"unsigned __int128*", SimpleTypeKind::UInt128},
+ {"__half*", SimpleTypeKind::Float16},
+ {"float*", SimpleTypeKind::Float32},
+ {"float*", SimpleTypeKind::Float32PartialPrecision},
+ {"__float48*", SimpleTypeKind::Float48},
+ {"double*", SimpleTypeKind::Float64},
+ {"long double*", SimpleTypeKind::Float80},
+ {"__float128*", SimpleTypeKind::Float128},
+ {"_Complex float*", SimpleTypeKind::Complex32},
+ {"_Complex double*", SimpleTypeKind::Complex64},
+ {"_Complex long double*", SimpleTypeKind::Complex80},
+ {"_Complex __float128*", SimpleTypeKind::Complex128},
+ {"bool*", SimpleTypeKind::Boolean8},
+ {"__bool16*", SimpleTypeKind::Boolean16},
+ {"__bool32*", SimpleTypeKind::Boolean32},
+ {"__bool64*", SimpleTypeKind::Boolean64},
+};
+} // namespace
+
+StringRef TypeIndex::simpleTypeName(TypeIndex TI) {
+ assert(TI.isNoneType() || TI.isSimple());
+
+ if (TI.isNoneType())
+ return "<no type>";
+
+ // This is a simple type.
+ for (const auto &SimpleTypeName : SimpleTypeNames) {
+ if (SimpleTypeName.Kind == TI.getSimpleKind()) {
+ if (TI.getSimpleMode() == SimpleTypeMode::Direct)
+ return SimpleTypeName.Name.drop_back(1);
+ // Otherwise, this is a pointer type. We gloss over the distinction
+ // between near, far, 64, 32, etc, and just give a pointer type.
+ return SimpleTypeName.Name;
+ }
+ }
+ return "<unknown simple type>";
+}
+
void llvm::codeview::printTypeIndex(ScopedPrinter &Printer, StringRef FieldName,
TypeIndex TI, TypeCollection &Types) {
StringRef TypeName;
- if (!TI.isNoneType())
- TypeName = Types.getTypeName(TI);
+ if (!TI.isNoneType()) {
+ if (TI.isSimple())
+ TypeName = TypeIndex::simpleTypeName(TI);
+ else
+ TypeName = Types.getTypeName(TI);
+ }
+
if (!TypeName.empty())
Printer.printHex(FieldName, TypeName, TI.getIndex());
else
diff --git a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
index 11e2e215303c..8704cea60786 100644
--- a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
+++ b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
@@ -34,7 +34,7 @@ static inline PointerMode getPointerMode(uint32_t Attrs) {
static inline bool isMemberPointer(uint32_t Attrs) {
PointerMode Mode = getPointerMode(Attrs);
return Mode == PointerMode::PointerToDataMember ||
- Mode == PointerMode::PointerToDataMember;
+ Mode == PointerMode::PointerToMemberFunction;
}
static inline uint32_t getEncodedIntegerLength(ArrayRef<uint8_t> Data) {
diff --git a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index 97b52f0fbdd6..87009bf1b6a1 100644
--- a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -55,6 +55,13 @@ bool DWARFAcceleratorTable::extract() {
return true;
}
+uint32_t DWARFAcceleratorTable::getNumBuckets() { return Hdr.NumBuckets; }
+uint32_t DWARFAcceleratorTable::getNumHashes() { return Hdr.NumHashes; }
+uint32_t DWARFAcceleratorTable::getSizeHdr() { return sizeof(Hdr); }
+uint32_t DWARFAcceleratorTable::getHeaderDataLength() {
+ return Hdr.HeaderDataLength;
+}
+
LLVM_DUMP_METHOD void DWARFAcceleratorTable::dump(raw_ostream &OS) const {
// Dump the header.
OS << "Magic = " << format("0x%08x", Hdr.Magic) << '\n'
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index 42ab48808f9a..9bafcde57f0a 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -425,248 +425,6 @@ DWARFDie DWARFContext::getDIEForOffset(uint32_t Offset) {
return DWARFDie();
}
-namespace {
-
-class Verifier {
- raw_ostream &OS;
- DWARFContext &DCtx;
-public:
- Verifier(raw_ostream &S, DWARFContext &D) : OS(S), DCtx(D) {}
-
- bool HandleDebugInfo() {
- bool Success = true;
- // A map that tracks all references (converted absolute references) so we
- // can verify each reference points to a valid DIE and not an offset that
- // lies between to valid DIEs.
- std::map<uint64_t, std::set<uint32_t>> ReferenceToDIEOffsets;
-
- OS << "Verifying .debug_info...\n";
- for (const auto &CU : DCtx.compile_units()) {
- unsigned NumDies = CU->getNumDIEs();
- for (unsigned I = 0; I < NumDies; ++I) {
- auto Die = CU->getDIEAtIndex(I);
- const auto Tag = Die.getTag();
- if (Tag == DW_TAG_null)
- continue;
- for (auto AttrValue : Die.attributes()) {
- const auto Attr = AttrValue.Attr;
- const auto Form = AttrValue.Value.getForm();
- switch (Attr) {
- case DW_AT_ranges:
- // Make sure the offset in the DW_AT_ranges attribute is valid.
- if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) {
- if (*SectionOffset >= DCtx.getRangeSection().Data.size()) {
- Success = false;
- OS << "error: DW_AT_ranges offset is beyond .debug_ranges "
- "bounds:\n";
- Die.dump(OS, 0);
- OS << "\n";
- }
- } else {
- Success = false;
- OS << "error: DIE has invalid DW_AT_ranges encoding:\n";
- Die.dump(OS, 0);
- OS << "\n";
- }
- break;
- case DW_AT_stmt_list:
- // Make sure the offset in the DW_AT_stmt_list attribute is valid.
- if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) {
- if (*SectionOffset >= DCtx.getLineSection().Data.size()) {
- Success = false;
- OS << "error: DW_AT_stmt_list offset is beyond .debug_line "
- "bounds: "
- << format("0x%08" PRIx32, *SectionOffset) << "\n";
- CU->getUnitDIE().dump(OS, 0);
- OS << "\n";
- }
- } else {
- Success = false;
- OS << "error: DIE has invalid DW_AT_stmt_list encoding:\n";
- Die.dump(OS, 0);
- OS << "\n";
- }
- break;
-
- default:
- break;
- }
- switch (Form) {
- case DW_FORM_ref1:
- case DW_FORM_ref2:
- case DW_FORM_ref4:
- case DW_FORM_ref8:
- case DW_FORM_ref_udata: {
- // Verify all CU relative references are valid CU offsets.
- Optional<uint64_t> RefVal = AttrValue.Value.getAsReference();
- assert(RefVal);
- if (RefVal) {
- auto DieCU = Die.getDwarfUnit();
- auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset();
- auto CUOffset = AttrValue.Value.getRawUValue();
- if (CUOffset >= CUSize) {
- Success = false;
- OS << "error: " << FormEncodingString(Form) << " CU offset "
- << format("0x%08" PRIx32, CUOffset)
- << " is invalid (must be less than CU size of "
- << format("0x%08" PRIx32, CUSize) << "):\n";
- Die.dump(OS, 0);
- OS << "\n";
- } else {
- // Valid reference, but we will verify it points to an actual
- // DIE later.
- ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset());
- }
- }
- break;
- }
- case DW_FORM_ref_addr: {
- // Verify all absolute DIE references have valid offsets in the
- // .debug_info section.
- Optional<uint64_t> RefVal = AttrValue.Value.getAsReference();
- assert(RefVal);
- if (RefVal) {
- if(*RefVal >= DCtx.getInfoSection().Data.size()) {
- Success = false;
- OS << "error: DW_FORM_ref_addr offset beyond .debug_info "
- "bounds:\n";
- Die.dump(OS, 0);
- OS << "\n";
- } else {
- // Valid reference, but we will verify it points to an actual
- // DIE later.
- ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset());
- }
- }
- break;
- }
- case DW_FORM_strp: {
- auto SecOffset = AttrValue.Value.getAsSectionOffset();
- assert(SecOffset); // DW_FORM_strp is a section offset.
- if (SecOffset && *SecOffset >= DCtx.getStringSection().size()) {
- Success = false;
- OS << "error: DW_FORM_strp offset beyond .debug_str bounds:\n";
- Die.dump(OS, 0);
- OS << "\n";
- }
- break;
- }
- default:
- break;
- }
- }
- }
- }
-
- // Take all references and make sure they point to an actual DIE by
- // getting the DIE by offset and emitting an error
- OS << "Verifying .debug_info references...\n";
- for (auto Pair: ReferenceToDIEOffsets) {
- auto Die = DCtx.getDIEForOffset(Pair.first);
- if (Die)
- continue;
- Success = false;
- OS << "error: invalid DIE reference " << format("0x%08" PRIx64, Pair.first)
- << ". Offset is in between DIEs:\n";
- for (auto Offset: Pair.second) {
- auto ReferencingDie = DCtx.getDIEForOffset(Offset);
- ReferencingDie.dump(OS, 0);
- OS << "\n";
- }
- OS << "\n";
- }
- return Success;
- }
-
- bool HandleDebugLine() {
- std::map<uint64_t, DWARFDie> StmtListToDie;
- bool Success = true;
- OS << "Verifying .debug_line...\n";
- for (const auto &CU : DCtx.compile_units()) {
- uint32_t LineTableOffset = 0;
- auto CUDie = CU->getUnitDIE();
- auto StmtFormValue = CUDie.find(DW_AT_stmt_list);
- if (!StmtFormValue) {
- // No line table for this compile unit.
- continue;
- }
- // Get the attribute value as a section offset. No need to produce an
- // error here if the encoding isn't correct because we validate this in
- // the .debug_info verifier.
- if (auto StmtSectionOffset = toSectionOffset(StmtFormValue)) {
- LineTableOffset = *StmtSectionOffset;
- if (LineTableOffset >= DCtx.getLineSection().Data.size()) {
- // Make sure we don't get a valid line table back if the offset
- // is wrong.
- assert(DCtx.getLineTableForUnit(CU.get()) == nullptr);
- // Skip this line table as it isn't valid. No need to create an error
- // here because we validate this in the .debug_info verifier.
- continue;
- } else {
- auto Iter = StmtListToDie.find(LineTableOffset);
- if (Iter != StmtListToDie.end()) {
- Success = false;
- OS << "error: two compile unit DIEs, "
- << format("0x%08" PRIx32, Iter->second.getOffset()) << " and "
- << format("0x%08" PRIx32, CUDie.getOffset())
- << ", have the same DW_AT_stmt_list section offset:\n";
- Iter->second.dump(OS, 0);
- CUDie.dump(OS, 0);
- OS << '\n';
- // Already verified this line table before, no need to do it again.
- continue;
- }
- StmtListToDie[LineTableOffset] = CUDie;
- }
- }
- auto LineTable = DCtx.getLineTableForUnit(CU.get());
- if (!LineTable) {
- Success = false;
- OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset)
- << "] was not able to be parsed for CU:\n";
- CUDie.dump(OS, 0);
- OS << '\n';
- continue;
- }
- uint32_t MaxFileIndex = LineTable->Prologue.FileNames.size();
- uint64_t PrevAddress = 0;
- uint32_t RowIndex = 0;
- for (const auto &Row : LineTable->Rows) {
- if (Row.Address < PrevAddress) {
- Success = false;
- OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset)
- << "] row[" << RowIndex
- << "] decreases in address from previous row:\n";
-
- DWARFDebugLine::Row::dumpTableHeader(OS);
- if (RowIndex > 0)
- LineTable->Rows[RowIndex - 1].dump(OS);
- Row.dump(OS);
- OS << '\n';
- }
-
- if (Row.File > MaxFileIndex) {
- Success = false;
- OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset)
- << "][" << RowIndex << "] has invalid file index " << Row.File
- << " (valid values are [1," << MaxFileIndex << "]):\n";
- DWARFDebugLine::Row::dumpTableHeader(OS);
- Row.dump(OS);
- OS << '\n';
- }
- if (Row.EndSequence)
- PrevAddress = 0;
- else
- PrevAddress = Row.Address;
- ++RowIndex;
- }
- }
- return Success;
- }
-};
-
-} // anonymous namespace
-
bool DWARFContext::verify(raw_ostream &OS, DIDumpType DumpType) {
bool Success = true;
DWARFVerifier verifier(OS, *this);
@@ -678,8 +436,13 @@ bool DWARFContext::verify(raw_ostream &OS, DIDumpType DumpType) {
if (!verifier.handleDebugLine())
Success = false;
}
+ if (DumpType == DIDT_All || DumpType == DIDT_AppleNames) {
+ if (!verifier.handleAppleNames())
+ Success = false;
+ }
return Success;
}
+
const DWARFUnitIndex &DWARFContext::getCUIndex() {
if (CUIndex)
return *CUIndex;
@@ -1250,7 +1013,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
continue;
RelSecName = RelSecName.substr(
- RelSecName.find_first_not_of("._")); // Skip . and _ prefixes.
+ RelSecName.find_first_not_of("._z")); // Skip . and _ prefixes.
// TODO: Add support for relocations in other sections as needed.
// Record relocations for the debug_info and debug_line sections.
diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index e6e007896cc8..cf9fec2b3254 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -514,6 +514,20 @@ static uint64_t readPointer(const DataExtractor &Data, uint32_t &Offset,
}
}
+// This is a workaround for old compilers which do not allow
+// noreturn attribute usage in lambdas. Once the support for those
+// compilers are phased out, we can remove this and return back to
+// a ReportError lambda: [StartOffset](const char *ErrorMsg).
+#define ReportError(ErrorMsg) ReportErrorImpl(StartOffset,ErrorMsg)
+static void LLVM_ATTRIBUTE_NORETURN
+ReportErrorImpl(uint32_t StartOffset, const char *ErrorMsg) {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ OS << format(ErrorMsg, StartOffset);
+ OS.flush();
+ report_fatal_error(Str);
+}
+
void DWARFDebugFrame::parse(DataExtractor Data) {
uint32_t Offset = 0;
DenseMap<uint32_t, CIE *> CIEs;
@@ -521,14 +535,6 @@ void DWARFDebugFrame::parse(DataExtractor Data) {
while (Data.isValidOffset(Offset)) {
uint32_t StartOffset = Offset;
- auto ReportError = [StartOffset](const char *ErrorMsg) {
- std::string Str;
- raw_string_ostream OS(Str);
- OS << format(ErrorMsg, StartOffset);
- OS.flush();
- report_fatal_error(Str);
- };
-
bool IsDWARF64 = false;
uint64_t Length = Data.getU32(&Offset);
uint64_t Id;
@@ -585,7 +591,6 @@ void DWARFDebugFrame::parse(DataExtractor Data) {
switch (AugmentationString[i]) {
default:
ReportError("Unknown augmentation character in entry at %lx");
- llvm_unreachable("ReportError should not return.");
case 'L':
LSDAPointerEncoding = Data.getU8(&Offset);
break;
diff --git a/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index 8a544296f65c..a6240fb60143 100644
--- a/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -14,6 +14,7 @@
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/DebugInfo/DWARF/DWARFSection.h"
+#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
#include <set>
@@ -275,3 +276,36 @@ bool DWARFVerifier::handleDebugLine() {
verifyDebugLineRows();
return NumDebugLineErrors == 0;
}
+
+bool DWARFVerifier::handleAppleNames() {
+ NumAppleNamesErrors = 0;
+ OS << "Verifying .apple_names...\n";
+
+ DataExtractor AppleNamesSection(DCtx.getAppleNamesSection().Data,
+ DCtx.isLittleEndian(), 0);
+ DataExtractor StrData(DCtx.getStringSection(), DCtx.isLittleEndian(), 0);
+ DWARFAcceleratorTable AppleNames(AppleNamesSection, StrData,
+ DCtx.getAppleNamesSection().Relocs);
+
+ if (!AppleNames.extract()) {
+ OS << "error: cannot extract .apple_names accelerator table\n";
+ return false;
+ }
+
+ // Verify that all buckets have a valid hash index or are empty
+ uint32_t NumBuckets = AppleNames.getNumBuckets();
+ uint32_t NumHashes = AppleNames.getNumHashes();
+
+ uint32_t BucketsOffset =
+ AppleNames.getSizeHdr() + AppleNames.getHeaderDataLength();
+
+ for (uint32_t BucketIdx = 0; BucketIdx < NumBuckets; ++BucketIdx) {
+ uint32_t HashIdx = AppleNamesSection.getU32(&BucketsOffset);
+ if (HashIdx >= NumHashes && HashIdx != UINT32_MAX) {
+ OS << format("error: Bucket[%d] has invalid hash index: [%d]\n",
+ BucketIdx, HashIdx);
+ ++NumAppleNamesErrors;
+ }
+ }
+ return NumAppleNamesErrors == 0;
+}
diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
index 396dffaa68b1..81a9d3eeec61 100644
--- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
@@ -177,7 +177,7 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter,
}
void DbiModuleDescriptorBuilder::addDebugSubsection(
- std::unique_ptr<DebugSubsection> Subsection) {
+ std::shared_ptr<DebugSubsection> Subsection) {
assert(Subsection);
C13Builders.push_back(llvm::make_unique<DebugSubsectionRecordBuilder>(
std::move(Subsection), CodeViewContainer::Pdb));
diff --git a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
index 355c7b57f4d1..e7304b444f23 100644
--- a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
@@ -45,10 +45,6 @@ void DbiStreamBuilder::setFlags(uint16_t F) { Flags = F; }
void DbiStreamBuilder::setMachineType(PDB_Machine M) { MachineType = M; }
-void DbiStreamBuilder::setSectionContribs(ArrayRef<SectionContrib> Arr) {
- SectionContribs = Arr;
-}
-
void DbiStreamBuilder::setSectionMap(ArrayRef<SecMapEntry> SecMap) {
SectionMap = SecMap;
}
@@ -293,23 +289,17 @@ static uint16_t toSecMapFlags(uint32_t Flags) {
return Ret;
}
-// A utility function to create Section Contributions
-// for a given input sections.
-std::vector<SectionContrib> DbiStreamBuilder::createSectionContribs(
- ArrayRef<object::coff_section> SecHdrs) {
- std::vector<SectionContrib> Ret;
-
- // Create a SectionContrib for each input section.
- for (auto &Sec : SecHdrs) {
- Ret.emplace_back();
- auto &Entry = Ret.back();
- memset(&Entry, 0, sizeof(Entry));
-
- Entry.Off = Sec.PointerToRawData;
- Entry.Size = Sec.SizeOfRawData;
- Entry.Characteristics = Sec.Characteristics;
- }
- return Ret;
+void DbiStreamBuilder::addSectionContrib(DbiModuleDescriptorBuilder *ModuleDbi,
+ const object::coff_section *SecHdr) {
+ SectionContrib SC;
+ memset(&SC, 0, sizeof(SC));
+ SC.ISect = (uint16_t)~0U; // This represents nil.
+ SC.Off = SecHdr->PointerToRawData;
+ SC.Size = SecHdr->SizeOfRawData;
+ SC.Characteristics = SecHdr->Characteristics;
+ // Use the module index in the module dbi stream or nil (-1).
+ SC.Imod = ModuleDbi ? ModuleDbi->getModuleIndex() : (uint16_t)~0U;
+ SectionContribs.emplace_back(SC);
}
// A utility function to create a Section Map for a given list of COFF sections.
@@ -372,7 +362,7 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout,
if (!SectionContribs.empty()) {
if (auto EC = Writer.writeEnum(DbiSecContribVer60))
return EC;
- if (auto EC = Writer.writeArray(SectionContribs))
+ if (auto EC = Writer.writeArray(makeArrayRef(SectionContribs)))
return EC;
}
diff --git a/lib/DebugInfo/PDB/Native/InfoStream.cpp b/lib/DebugInfo/PDB/Native/InfoStream.cpp
index 7c6069652da6..a3979d480bf4 100644
--- a/lib/DebugInfo/PDB/Native/InfoStream.cpp
+++ b/lib/DebugInfo/PDB/Native/InfoStream.cpp
@@ -102,6 +102,10 @@ InfoStream::named_streams() const {
return NamedStreams.entries();
}
+bool InfoStream::containsIdStream() const {
+ return !!(Features & PdbFeatureContainsIdStream);
+}
+
PdbRaw_ImplVer InfoStream::getVersion() const {
return static_cast<PdbRaw_ImplVer>(Version);
}
diff --git a/lib/DebugInfo/PDB/Native/PDBFile.cpp b/lib/DebugInfo/PDB/Native/PDBFile.cpp
index 1254e23c73eb..a9597cdf4c4d 100644
--- a/lib/DebugInfo/PDB/Native/PDBFile.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBFile.cpp
@@ -363,6 +363,16 @@ Expected<PDBStringTable &> PDBFile::getStringTable() {
return *Strings;
}
+uint32_t PDBFile::getPointerSize() {
+ auto DbiS = getPDBDbiStream();
+ if (!DbiS)
+ return 0;
+ PDB_Machine Machine = DbiS->getMachineType();
+ if (Machine == PDB_Machine::Amd64)
+ return 8;
+ return 4;
+}
+
bool PDBFile::hasPDBDbiStream() const { return StreamDBI < getNumStreams(); }
bool PDBFile::hasPDBGlobalsStream() {
diff --git a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
index 2c6465e6fb2a..12b0c3b36c1d 100644
--- a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
@@ -80,6 +80,16 @@ Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) {
}
Expected<msf::MSFLayout> PDBFileBuilder::finalizeMsfLayout() {
+
+ if (Ipi && Ipi->getRecordCount() > 0) {
+ // In theory newer PDBs always have an ID stream, but by saying that we're
+ // only going to *really* have an ID stream if there is at least one ID
+ // record, we leave open the opportunity to test older PDBs such as those
+ // that don't have an ID stream.
+ auto &Info = getInfoBuilder();
+ Info.addFeature(PdbRaw_FeatureSig::VC140);
+ }
+
uint32_t StringsLen = Strings.calculateSerializedSize();
if (auto EC = addNamedStream("/names", StringsLen))
diff --git a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
index 6013c342cf02..f9f8ac219d35 100644
--- a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
@@ -56,7 +56,8 @@ Error PDBStringTable::readStrings(BinaryStreamReader &Reader) {
return Error::success();
}
-codeview::DebugStringTableSubsectionRef PDBStringTable::getStringTable() const {
+const codeview::DebugStringTableSubsectionRef &
+PDBStringTable::getStringTable() const {
return Strings;
}
diff --git a/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
index a472181a4895..90acfadd311f 100644
--- a/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
@@ -52,6 +52,11 @@ uint32_t PDBStringTableBuilder::calculateSerializedSize() const {
return Size;
}
+void PDBStringTableBuilder::setStrings(
+ const codeview::DebugStringTableSubsection &Strings) {
+ this->Strings = Strings;
+}
+
Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const {
// Write a header
PDBStringTableHeader H;
diff --git a/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/lib/DebugInfo/PDB/Native/PublicsStream.cpp
index 091ac67035dc..8f3474b9ce19 100644
--- a/lib/DebugInfo/PDB/Native/PublicsStream.cpp
+++ b/lib/DebugInfo/PDB/Native/PublicsStream.cpp
@@ -130,4 +130,13 @@ PublicsStream::getSymbols(bool *HadError) const {
return SS.getSymbols(HadError);
}
+Expected<const codeview::CVSymbolArray &>
+PublicsStream::getSymbolArray() const {
+ auto SymbolS = Pdb.getPDBSymbolStream();
+ if (!SymbolS)
+ return SymbolS.takeError();
+
+ return SymbolS->getSymbolArray();
+}
+
Error PublicsStream::commit() { return Error::success(); }
diff --git a/lib/DebugInfo/PDB/Native/TpiHashing.cpp b/lib/DebugInfo/PDB/Native/TpiHashing.cpp
index 16904a5a27ed..91b8d648fcf9 100644
--- a/lib/DebugInfo/PDB/Native/TpiHashing.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiHashing.cpp
@@ -9,6 +9,7 @@
#include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
#include "llvm/DebugInfo/PDB/Native/Hash.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
diff --git a/lib/DebugInfo/PDB/UDTLayout.cpp b/lib/DebugInfo/PDB/UDTLayout.cpp
index aacefae80c3a..da353cb6977c 100644
--- a/lib/DebugInfo/PDB/UDTLayout.cpp
+++ b/lib/DebugInfo/PDB/UDTLayout.cpp
@@ -181,13 +181,14 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) {
if (Data->getDataKind() == PDB_DataKind::Member)
Members.push_back(std::move(Data));
else
- Other.push_back(std::move(Child));
+ Other.push_back(std::move(Data));
} else if (auto VT = unique_dyn_cast<PDBSymbolTypeVTable>(Child))
VTables.push_back(std::move(VT));
else if (auto Func = unique_dyn_cast<PDBSymbolFunc>(Child))
Funcs.push_back(std::move(Func));
- else
+ else {
Other.push_back(std::move(Child));
+ }
}
// We don't want to have any re-allocations in the list of bases, so make
diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp
index 9aad3771784d..0453a7f443b5 100644
--- a/lib/Fuzzer/FuzzerDriver.cpp
+++ b/lib/Fuzzer/FuzzerDriver.cpp
@@ -553,12 +553,12 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
return RunInMultipleProcesses(Args, Flags.workers, Flags.jobs);
const size_t kMaxSaneLen = 1 << 20;
- const size_t kMinDefaultLen = 64;
+ const size_t kMinDefaultLen = 4096;
FuzzingOptions Options;
Options.Verbosity = Flags.verbosity;
Options.MaxLen = Flags.max_len;
Options.ExperimentalLenControl = Flags.experimental_len_control;
- if (Flags.experimental_len_control && Flags.max_len == 64)
+ if (Flags.experimental_len_control && Flags.max_len == kMinDefaultLen)
Options.MaxLen = 1 << 20;
Options.UnitTimeoutSec = Flags.timeout;
Options.ErrorExitCode = Flags.error_exitcode;
diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp
index f6083282ab61..fbf18357ede6 100644
--- a/lib/Fuzzer/FuzzerLoop.cpp
+++ b/lib/Fuzzer/FuzzerLoop.cpp
@@ -301,7 +301,9 @@ void Fuzzer::SetMaxInputLen(size_t MaxInputLen) {
this->MaxInputLen = MaxInputLen;
this->MaxMutationLen = MaxInputLen;
AllocateCurrentUnitData();
- Printf("INFO: -max_len is not provided, using %zd\n", MaxInputLen);
+ Printf("INFO: -max_len is not provided; "
+ "libFuzzer will not generate inputs larger than %zd bytes\n",
+ MaxInputLen);
}
void Fuzzer::SetMaxMutationLen(size_t MaxMutationLen) {
diff --git a/lib/Fuzzer/FuzzerTracePC.cpp b/lib/Fuzzer/FuzzerTracePC.cpp
index ea93468ea0ed..6f5c7be41062 100644
--- a/lib/Fuzzer/FuzzerTracePC.cpp
+++ b/lib/Fuzzer/FuzzerTracePC.cpp
@@ -53,6 +53,17 @@ size_t TracePC::GetTotalPCCoverage() {
return Res;
}
+
+void TracePC::HandleInline8bitCountersInit(uint8_t *Start, uint8_t *Stop) {
+ if (Start == Stop) return;
+ if (NumModulesWithInline8bitCounters &&
+ ModuleCounters[NumModulesWithInline8bitCounters-1].Start == Start) return;
+ assert(NumModulesWithInline8bitCounters <
+ sizeof(ModuleCounters) / sizeof(ModuleCounters[0]));
+ ModuleCounters[NumModulesWithInline8bitCounters++] = {Start, Stop};
+ NumInline8bitCounters += Stop - Start;
+}
+
void TracePC::HandleInit(uint32_t *Start, uint32_t *Stop) {
if (Start == Stop || *Start) return;
assert(NumModules < sizeof(Modules) / sizeof(Modules[0]));
@@ -76,6 +87,13 @@ void TracePC::PrintModuleInfo() {
for (size_t i = 0; i < NumModules; i++)
Printf("[%p, %p), ", Modules[i].Start, Modules[i].Stop);
Printf("\n");
+ if (NumModulesWithInline8bitCounters) {
+ Printf("INFO: Loaded %zd modules with %zd inline 8-bit counters\n",
+ NumModulesWithInline8bitCounters, NumInline8bitCounters);
+ for (size_t i = 0; i < NumModulesWithInline8bitCounters; i++)
+ Printf("[%p, %p), ", ModuleCounters[i].Start, ModuleCounters[i].Stop);
+ Printf("\n");
+ }
}
ATTRIBUTE_NO_SANITIZE_ALL
@@ -304,6 +322,11 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *Start, uint32_t *Stop) {
}
ATTRIBUTE_INTERFACE
+void __sanitizer_cov_8bit_counters_init(uint8_t *Start, uint8_t *Stop) {
+ fuzzer::TPC.HandleInline8bitCountersInit(Start, Stop);
+}
+
+ATTRIBUTE_INTERFACE
ATTRIBUTE_NO_SANITIZE_ALL
void __sanitizer_cov_trace_pc_indir(uintptr_t Callee) {
uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
diff --git a/lib/Fuzzer/FuzzerTracePC.h b/lib/Fuzzer/FuzzerTracePC.h
index 6523fa06005c..5ec8c590b4df 100644
--- a/lib/Fuzzer/FuzzerTracePC.h
+++ b/lib/Fuzzer/FuzzerTracePC.h
@@ -51,7 +51,8 @@ class TracePC {
// How many bits of PC are used from __sanitizer_cov_trace_pc.
static const size_t kTracePcBits = 18;
- void HandleInit(uint32_t *start, uint32_t *stop);
+ void HandleInit(uint32_t *Start, uint32_t *Stop);
+ void HandleInline8bitCountersInit(uint8_t *Start, uint8_t *Stop);
void HandleCallerCallee(uintptr_t Caller, uintptr_t Callee);
template <class T> void HandleCmp(uintptr_t PC, T Arg1, T Arg2);
size_t GetTotalPCCoverage();
@@ -104,6 +105,10 @@ private:
size_t NumModules; // linker-initialized.
size_t NumGuards; // linker-initialized.
+ struct { uint8_t *Start, *Stop; } ModuleCounters[4096];
+ size_t NumModulesWithInline8bitCounters; // linker-initialized.
+ size_t NumInline8bitCounters;
+
uint8_t *Counters() const;
uintptr_t *PCs() const;
@@ -118,12 +123,24 @@ void ForEachNonZeroByte(const uint8_t *Begin, const uint8_t *End,
size_t FirstFeature, Callback Handle8bitCounter) {
typedef uintptr_t LargeType;
const size_t Step = sizeof(LargeType) / sizeof(uint8_t);
- assert(!(reinterpret_cast<uintptr_t>(Begin) % 64));
- for (auto P = Begin; P < End; P += Step)
+ const size_t StepMask = Step - 1;
+ auto P = Begin;
+ // Iterate by 1 byte until either the alignment boundary or the end.
+ for (; reinterpret_cast<uintptr_t>(P) & StepMask && P < End; P++)
+ if (uint8_t V = *P)
+ Handle8bitCounter(FirstFeature + P - Begin, V);
+
+ // Iterate by Step bytes at a time.
+ for (; P < End; P += Step)
if (LargeType Bundle = *reinterpret_cast<const LargeType *>(P))
for (size_t I = 0; I < Step; I++, Bundle >>= 8)
if (uint8_t V = Bundle & 0xff)
Handle8bitCounter(FirstFeature + P - Begin + I, V);
+
+ // Iterate by 1 byte until the end.
+ for (; P < End; P++)
+ if (uint8_t V = *P)
+ Handle8bitCounter(FirstFeature + P - Begin, V);
}
template <class Callback> // bool Callback(size_t Feature)
@@ -145,8 +162,16 @@ void TracePC::CollectFeatures(Callback HandleFeature) const {
HandleFeature(Idx * 8 + Bit);
};
- ForEachNonZeroByte(Counters, Counters + N, 0, Handle8bitCounter);
- ForEachNonZeroByte(ExtraCountersBegin(), ExtraCountersEnd(), N * 8,
+ size_t FirstFeature = 0;
+ ForEachNonZeroByte(Counters, Counters + N, FirstFeature, Handle8bitCounter);
+ FirstFeature += N * 8;
+ for (size_t i = 0; i < NumModulesWithInline8bitCounters; i++) {
+ ForEachNonZeroByte(ModuleCounters[i].Start, ModuleCounters[i].Stop,
+ FirstFeature, Handle8bitCounter);
+ FirstFeature += 8 * (ModuleCounters[i].Stop - ModuleCounters[i].Start);
+ }
+
+ ForEachNonZeroByte(ExtraCountersBegin(), ExtraCountersEnd(), FirstFeature,
Handle8bitCounter);
if (UseValueProfile)
diff --git a/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp b/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp
index dfb6007b7970..b5a61ddca715 100644
--- a/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp
+++ b/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp
@@ -9,7 +9,7 @@
#include <cstring>
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
- if (Size < 16) return 0;
+ if (Size < 16 || Size > 64) return 0;
int64_t x;
uint64_t y;
memcpy(&x, Data, sizeof(x));
diff --git a/lib/Fuzzer/test/CMakeLists.txt b/lib/Fuzzer/test/CMakeLists.txt
index b39938a705f6..1cf6c9502a2b 100644
--- a/lib/Fuzzer/test/CMakeLists.txt
+++ b/lib/Fuzzer/test/CMakeLists.txt
@@ -206,6 +206,9 @@ include_directories(..)
add_subdirectory(no-coverage)
add_subdirectory(trace-pc)
add_subdirectory(ubsan)
+if (NOT MSVC)
+ add_subdirectory(inline-8bit-counters)
+endif()
add_library(LLVMFuzzer-DSO1 SHARED DSO1.cpp)
add_library(LLVMFuzzer-DSO2 SHARED DSO2.cpp)
diff --git a/lib/Fuzzer/test/FourIndependentBranchesTest.cpp b/lib/Fuzzer/test/FourIndependentBranchesTest.cpp
index bbf5ea235c7a..ba963d9b1de8 100644
--- a/lib/Fuzzer/test/FourIndependentBranchesTest.cpp
+++ b/lib/Fuzzer/test/FourIndependentBranchesTest.cpp
@@ -8,6 +8,7 @@
#include <iostream>
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ if (Size > 64) return 0;
int bits = 0;
if (Size > 0 && Data[0] == 'F') bits |= 1;
if (Size > 1 && Data[1] == 'U') bits |= 2;
diff --git a/lib/Fuzzer/test/FuzzerUnittest.cpp b/lib/Fuzzer/test/FuzzerUnittest.cpp
index c8beb4331bfa..812894fd947f 100644
--- a/lib/Fuzzer/test/FuzzerUnittest.cpp
+++ b/lib/Fuzzer/test/FuzzerUnittest.cpp
@@ -772,4 +772,16 @@ TEST(Fuzzer, ForEachNonZeroByte) {
Expected = {{108, 1}, {109, 2}, {118, 3}, {120, 4},
{135, 5}, {137, 6}, {146, 7}, {163, 8}};
EXPECT_EQ(Res, Expected);
+
+ Res.clear();
+ ForEachNonZeroByte(Ar + 9, Ar + N, 109, CB);
+ Expected = { {109, 2}, {118, 3}, {120, 4},
+ {135, 5}, {137, 6}, {146, 7}, {163, 8}};
+ EXPECT_EQ(Res, Expected);
+
+ Res.clear();
+ ForEachNonZeroByte(Ar + 9, Ar + N - 9, 109, CB);
+ Expected = { {109, 2}, {118, 3}, {120, 4},
+ {135, 5}, {137, 6}, {146, 7}};
+ EXPECT_EQ(Res, Expected);
}
diff --git a/lib/Fuzzer/test/ShrinkControlFlowTest.cpp b/lib/Fuzzer/test/ShrinkControlFlowTest.cpp
index d09542963626..37eeede7cbff 100644
--- a/lib/Fuzzer/test/ShrinkControlFlowTest.cpp
+++ b/lib/Fuzzer/test/ShrinkControlFlowTest.cpp
@@ -11,6 +11,7 @@
static volatile int Sink;
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ if (Size > 64) return 0;
int8_t Ids[256];
memset(Ids, -1, sizeof(Ids));
for (size_t i = 0; i < Size; i++)
diff --git a/lib/Fuzzer/test/SimpleHashTest.cpp b/lib/Fuzzer/test/SimpleHashTest.cpp
index 99e96cb25dcd..a3f4211ebeef 100644
--- a/lib/Fuzzer/test/SimpleHashTest.cpp
+++ b/lib/Fuzzer/test/SimpleHashTest.cpp
@@ -26,7 +26,7 @@ static uint32_t simple_hash(const uint8_t *Data, size_t Size) {
}
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
- if (Size < 14)
+ if (Size < 14 || Size > 64)
return 0;
uint32_t Hash = simple_hash(&Data[0], Size - 4);
diff --git a/lib/Fuzzer/test/SingleStrncmpTest.cpp b/lib/Fuzzer/test/SingleStrncmpTest.cpp
index b302670fb743..b38c7995d8ff 100644
--- a/lib/Fuzzer/test/SingleStrncmpTest.cpp
+++ b/lib/Fuzzer/test/SingleStrncmpTest.cpp
@@ -8,6 +8,7 @@
#include <cstring>
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ if (Size > 64) return 0;
char *S = (char*)Data;
volatile auto Strncmp = &(strncmp); // Make sure strncmp is not inlined.
if (Size >= 6 && !Strncmp(S, "qwerty", 6)) {
diff --git a/lib/Fuzzer/test/TableLookupTest.cpp b/lib/Fuzzer/test/TableLookupTest.cpp
index 8126eeabaf42..4d8ab0611cde 100644
--- a/lib/Fuzzer/test/TableLookupTest.cpp
+++ b/lib/Fuzzer/test/TableLookupTest.cpp
@@ -15,7 +15,6 @@ const size_t N = 1 << 12;
// Define an array of counters that will be understood by libFuzzer
// as extra coverage signal. The array must be:
// * uint8_t
-// * aligned by 64
// * in the section named __libfuzzer_extra_counters.
// The target code may declare more than one such array.
//
@@ -23,7 +22,7 @@ const size_t N = 1 << 12;
// depending on whether multiple occurrences of the event 'Idx'
// is important to distinguish from one occurrence.
#ifdef __linux__
-alignas(64) __attribute__((section("__libfuzzer_extra_counters")))
+__attribute__((section("__libfuzzer_extra_counters")))
#endif
static uint8_t Counters[N];
diff --git a/lib/Fuzzer/test/fuzzer-dirs.test b/lib/Fuzzer/test/fuzzer-dirs.test
index 3de64f278f5d..622ff5da3a29 100644
--- a/lib/Fuzzer/test/fuzzer-dirs.test
+++ b/lib/Fuzzer/test/fuzzer-dirs.test
@@ -5,9 +5,13 @@ RUN: echo b > %t/SUB1/SUB2/b
RUN: echo c > %t/SUB1/SUB2/SUB3/c
RUN: LLVMFuzzer-SimpleTest %t/SUB1 -runs=0 2>&1 | FileCheck %s --check-prefix=SUBDIRS
SUBDIRS: READ units: 3
-RUN: echo -n zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz > %t/SUB1/long
+RUN: echo -n zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz > %t/SUB1/f64
+RUN: cat %t/SUB1/f64 %t/SUB1/f64 %t/SUB1/f64 %t/SUB1/f64 > %t/SUB1/f256
+RUN: cat %t/SUB1/f256 %t/SUB1/f256 %t/SUB1/f256 %t/SUB1/f256 > %t/SUB1/f1024
+RUN: cat %t/SUB1/f1024 %t/SUB1/f1024 %t/SUB1/f1024 %t/SUB1/f1024 > %t/SUB1/f4096
+RUN: cat %t/SUB1/f4096 %t/SUB1/f4096 > %t/SUB1/f8192
RUN: LLVMFuzzer-SimpleTest %t/SUB1 -runs=0 2>&1 | FileCheck %s --check-prefix=LONG
-LONG: INFO: -max_len is not provided, using 93
+LONG: INFO: -max_len is not provided; libFuzzer will not generate inputs larger than 8192 bytes
RUN: rm -rf %t/SUB1
RUN: not LLVMFuzzer-SimpleTest NONEXISTENT_DIR 2>&1 | FileCheck %s --check-prefix=NONEXISTENT_DIR
diff --git a/lib/Fuzzer/test/inline-8bit-counters.test b/lib/Fuzzer/test/inline-8bit-counters.test
new file mode 100644
index 000000000000..8747af81451f
--- /dev/null
+++ b/lib/Fuzzer/test/inline-8bit-counters.test
@@ -0,0 +1,4 @@
+REQUIRES: linux
+CHECK: INFO: Loaded 1 modules with {{.*}} inline 8-bit counters
+CHECK: BINGO
+RUN: LLVMFuzzer-SimpleTest-Inline8bitCounters -runs=1000000 -seed=1 2>&1 | FileCheck %s
diff --git a/lib/Fuzzer/test/inline-8bit-counters/CMakeLists.txt b/lib/Fuzzer/test/inline-8bit-counters/CMakeLists.txt
new file mode 100644
index 000000000000..088ab04fe6a0
--- /dev/null
+++ b/lib/Fuzzer/test/inline-8bit-counters/CMakeLists.txt
@@ -0,0 +1,12 @@
+# These tests are instrumented with -fsanitize-coverage=inline-8bit-counters
+
+set(CMAKE_CXX_FLAGS
+ "${LIBFUZZER_FLAGS_BASE} -fno-sanitize-coverage=trace-pc-guard -fsanitize-coverage=inline-8bit-counters")
+
+set(Inline8bitCounterTests
+ SimpleTest
+ )
+
+foreach(Test ${Inline8bitCounterTests})
+ add_libfuzzer_test(${Test}-Inline8bitCounters SOURCES ../${Test}.cpp)
+endforeach()
diff --git a/lib/Fuzzer/test/trace-pc/CMakeLists.txt b/lib/Fuzzer/test/trace-pc/CMakeLists.txt
index e800f82cc5dc..572fcc983654 100644
--- a/lib/Fuzzer/test/trace-pc/CMakeLists.txt
+++ b/lib/Fuzzer/test/trace-pc/CMakeLists.txt
@@ -1,5 +1,4 @@
-# These tests are not instrumented with coverage and don't
-# have coverage rt in the binary.
+# These tests are instrumented with -fsanitize-coverage=trace-pc
set(CMAKE_CXX_FLAGS
"${LIBFUZZER_FLAGS_BASE} -fno-sanitize-coverage=edge,trace-cmp,indirect-calls,8bit-counters,trace-pc-guard -fsanitize-coverage=trace-pc")
diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp
index a20f3f811c8d..3469026ad7ed 100644
--- a/lib/IR/ConstantFold.cpp
+++ b/lib/IR/ConstantFold.cpp
@@ -348,8 +348,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
/// factors factored out. If Folded is false, return null if no factoring was
/// possible, to avoid endlessly bouncing an unfoldable expression back into the
/// top-level folder.
-static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy,
- bool Folded) {
+static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded) {
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
Constant *N = ConstantInt::get(DestTy, ATy->getNumElements());
Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
@@ -404,8 +403,7 @@ static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy,
/// factors factored out. If Folded is false, return null if no factoring was
/// possible, to avoid endlessly bouncing an unfoldable expression back into the
/// top-level folder.
-static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy,
- bool Folded) {
+static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy, bool Folded) {
// The alignment of an array is equal to the alignment of the
// array element. Note that this is not always true for vectors.
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
@@ -469,8 +467,7 @@ static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy,
/// any known factors factored out. If Folded is false, return null if no
/// factoring was possible, to avoid endlessly bouncing an unfoldable expression
/// back into the top-level folder.
-static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo,
- Type *DestTy,
+static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo, Type *DestTy,
bool Folded) {
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false,
diff --git a/lib/IR/ConstantsContext.h b/lib/IR/ConstantsContext.h
index 6c189cf656de..6585304e7674 100644
--- a/lib/IR/ConstantsContext.h
+++ b/lib/IR/ConstantsContext.h
@@ -55,8 +55,6 @@ public:
return User::operator new(s, 1);
}
- void *operator new(size_t, unsigned) = delete;
-
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
};
@@ -77,8 +75,6 @@ public:
return User::operator new(s, 2);
}
- void *operator new(size_t, unsigned) = delete;
-
/// Transparently provide more efficient getOperand methods.
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
};
@@ -99,8 +95,6 @@ public:
return User::operator new(s, 3);
}
- void *operator new(size_t, unsigned) = delete;
-
/// Transparently provide more efficient getOperand methods.
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
};
@@ -122,8 +116,6 @@ public:
return User::operator new(s, 2);
}
- void *operator new(size_t, unsigned) = delete;
-
/// Transparently provide more efficient getOperand methods.
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
};
@@ -146,8 +138,6 @@ public:
return User::operator new(s, 3);
}
- void *operator new(size_t, unsigned) = delete;
-
/// Transparently provide more efficient getOperand methods.
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
};
@@ -173,8 +163,6 @@ public:
return User::operator new(s, 3);
}
- void *operator new(size_t, unsigned) = delete;
-
/// Transparently provide more efficient getOperand methods.
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
};
@@ -196,8 +184,6 @@ public:
return User::operator new(s, 1);
}
- void *operator new(size_t, unsigned) = delete;
-
/// Indices - These identify which value to extract.
const SmallVector<unsigned, 4> Indices;
@@ -230,8 +216,6 @@ public:
return User::operator new(s, 2);
}
- void *operator new(size_t, unsigned) = delete;
-
/// Indices - These identify the position for the insertion.
const SmallVector<unsigned, 4> Indices;
@@ -297,8 +281,6 @@ public:
return User::operator new(s, 2);
}
- void *operator new(size_t, unsigned) = delete;
-
/// Transparently provide more efficient getOperand methods.
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp
index e6c49cad0722..0bf68b4c53bb 100644
--- a/lib/IR/DebugInfoMetadata.cpp
+++ b/lib/IR/DebugInfoMetadata.cpp
@@ -598,8 +598,7 @@ unsigned DIExpression::ExprOperand::getSize() const {
case dwarf::DW_OP_LLVM_fragment:
return 3;
case dwarf::DW_OP_constu:
- case dwarf::DW_OP_plus:
- case dwarf::DW_OP_minus:
+ case dwarf::DW_OP_plus_uconst:
return 2;
default:
return 1;
@@ -641,6 +640,7 @@ bool DIExpression::isValid() const {
break;
}
case dwarf::DW_OP_constu:
+ case dwarf::DW_OP_plus_uconst:
case dwarf::DW_OP_plus:
case dwarf::DW_OP_minus:
case dwarf::DW_OP_deref:
@@ -664,11 +664,12 @@ DIExpression::getFragmentInfo(expr_op_iterator Start, expr_op_iterator End) {
void DIExpression::appendOffset(SmallVectorImpl<uint64_t> &Ops,
int64_t Offset) {
if (Offset > 0) {
- Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(dwarf::DW_OP_plus_uconst);
Ops.push_back(Offset);
} else if (Offset < 0) {
- Ops.push_back(dwarf::DW_OP_minus);
+ Ops.push_back(dwarf::DW_OP_constu);
Ops.push_back(-Offset);
+ Ops.push_back(dwarf::DW_OP_minus);
}
}
@@ -677,16 +678,23 @@ bool DIExpression::extractIfOffset(int64_t &Offset) const {
Offset = 0;
return true;
}
- if (getNumElements() != 2)
- return false;
- if (Elements[0] == dwarf::DW_OP_plus) {
+
+ if (getNumElements() == 2 && Elements[0] == dwarf::DW_OP_plus_uconst) {
Offset = Elements[1];
return true;
}
- if (Elements[0] == dwarf::DW_OP_minus) {
- Offset = -Elements[1];
- return true;
+
+ if (getNumElements() == 3 && Elements[0] == dwarf::DW_OP_constu) {
+ if (Elements[2] == dwarf::DW_OP_plus) {
+ Offset = Elements[1];
+ return true;
+ }
+ if (Elements[2] == dwarf::DW_OP_minus) {
+ Offset = -Elements[1];
+ return true;
+ }
}
+
return false;
}
diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp
index 81b02946e1d5..b7fa07c6ffac 100644
--- a/lib/IR/IRBuilder.cpp
+++ b/lib/IR/IRBuilder.cpp
@@ -134,18 +134,17 @@ CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
return CI;
}
-CallInst *IRBuilderBase::CreateElementAtomicMemCpy(
- Value *Dst, Value *Src, Value *NumElements, uint32_t ElementSize,
- MDNode *TBAATag, MDNode *TBAAStructTag, MDNode *ScopeTag,
- MDNode *NoAliasTag) {
+CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemCpy(
+ Value *Dst, Value *Src, Value *Size, uint32_t ElementSize, MDNode *TBAATag,
+ MDNode *TBAAStructTag, MDNode *ScopeTag, MDNode *NoAliasTag) {
Dst = getCastedInt8PtrValue(Dst);
Src = getCastedInt8PtrValue(Src);
- Value *Ops[] = {Dst, Src, NumElements, getInt32(ElementSize)};
- Type *Tys[] = {Dst->getType(), Src->getType()};
+ Value *Ops[] = {Dst, Src, Size, getInt32(ElementSize)};
+ Type *Tys[] = {Dst->getType(), Src->getType(), Size->getType()};
Module *M = BB->getParent()->getParent();
- Value *TheFn =
- Intrinsic::getDeclaration(M, Intrinsic::memcpy_element_atomic, Tys);
+ Value *TheFn = Intrinsic::getDeclaration(
+ M, Intrinsic::memcpy_element_unordered_atomic, Tys);
CallInst *CI = createCallHelper(TheFn, Ops, this);
diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp
index 0b1bc9a8c270..92e5798dcf21 100644
--- a/lib/IR/Metadata.cpp
+++ b/lib/IR/Metadata.cpp
@@ -1470,7 +1470,7 @@ void GlobalObject::copyMetadata(const GlobalObject *Other, unsigned Offset) {
if (E)
OrigElements = E->getElements();
std::vector<uint64_t> Elements(OrigElements.size() + 2);
- Elements[0] = dwarf::DW_OP_plus;
+ Elements[0] = dwarf::DW_OP_plus_uconst;
Elements[1] = Offset;
std::copy(OrigElements.begin(), OrigElements.end(), Elements.begin() + 2);
E = DIExpression::get(getContext(), Elements);
diff --git a/lib/IR/ModuleSummaryIndex.cpp b/lib/IR/ModuleSummaryIndex.cpp
index 9dd712f9ca13..51c4bae3332e 100644
--- a/lib/IR/ModuleSummaryIndex.cpp
+++ b/lib/IR/ModuleSummaryIndex.cpp
@@ -56,3 +56,16 @@ ModuleSummaryIndex::getGlobalValueSummary(uint64_t ValueGUID,
auto &Summary = VI.getSummaryList()[0];
return Summary.get();
}
+
+bool ModuleSummaryIndex::isGUIDLive(GlobalValue::GUID GUID) const {
+ auto VI = getValueInfo(GUID);
+ if (!VI)
+ return true;
+ const auto &SummaryList = VI.getSummaryList();
+ if (SummaryList.empty())
+ return true;
+ for (auto &I : SummaryList)
+ if (isGlobalValueLive(I.get()))
+ return true;
+ return false;
+}
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index 5c1b3412840d..819f63520c74 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -1330,6 +1330,14 @@ Verifier::visitModuleFlag(const MDNode *Op,
= mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(2));
Assert(Value, "wchar_size metadata requires constant integer argument");
}
+
+ if (ID->getString() == "Linker Options") {
+ // If the llvm.linker.options named metadata exists, we assume that the
+ // bitcode reader has upgraded the module flag. Otherwise the flag might
+ // have been created by a client directly.
+ Assert(M.getNamedMetadata("llvm.linker.options"),
+ "'Linker Options' named metadata no longer supported");
+ }
}
/// Return true if this attribute kind only applies to functions.
@@ -4004,10 +4012,16 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
CS);
break;
}
- case Intrinsic::memcpy_element_atomic: {
- ConstantInt *ElementSizeCI = dyn_cast<ConstantInt>(CS.getArgOperand(3));
- Assert(ElementSizeCI, "element size of the element-wise atomic memory "
- "intrinsic must be a constant int",
+ case Intrinsic::memcpy_element_unordered_atomic: {
+ const ElementUnorderedAtomicMemCpyInst *MI =
+ cast<ElementUnorderedAtomicMemCpyInst>(CS.getInstruction());
+ ;
+
+ ConstantInt *ElementSizeCI =
+ dyn_cast<ConstantInt>(MI->getRawElementSizeInBytes());
+ Assert(ElementSizeCI,
+ "element size of the element-wise unordered atomic memory "
+ "intrinsic must be a constant int",
CS);
const APInt &ElementSizeVal = ElementSizeCI->getValue();
Assert(ElementSizeVal.isPowerOf2(),
@@ -4015,19 +4029,24 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
"must be a power of 2",
CS);
+ if (auto *LengthCI = dyn_cast<ConstantInt>(MI->getLength())) {
+ uint64_t Length = LengthCI->getZExtValue();
+ uint64_t ElementSize = MI->getElementSizeInBytes();
+ Assert((Length % ElementSize) == 0,
+ "constant length must be a multiple of the element size in the "
+ "element-wise atomic memory intrinsic",
+ CS);
+ }
+
auto IsValidAlignment = [&](uint64_t Alignment) {
return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment);
};
-
uint64_t DstAlignment = CS.getParamAlignment(0),
SrcAlignment = CS.getParamAlignment(1);
-
Assert(IsValidAlignment(DstAlignment),
- "incorrect alignment of the destination argument",
- CS);
+ "incorrect alignment of the destination argument", CS);
Assert(IsValidAlignment(SrcAlignment),
- "incorrect alignment of the source argument",
- CS);
+ "incorrect alignment of the source argument", CS);
break;
}
case Intrinsic::gcroot:
diff --git a/lib/LLVMBuild.txt b/lib/LLVMBuild.txt
index 9e586465025e..1d22c2a11f13 100644
--- a/lib/LLVMBuild.txt
+++ b/lib/LLVMBuild.txt
@@ -39,6 +39,7 @@ subdirectories =
Support
TableGen
Target
+ Testing
ToolDrivers
Transforms
diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp
index 9d2a44045d6a..35032fdd33e1 100644
--- a/lib/LTO/LTO.cpp
+++ b/lib/LTO/LTO.cpp
@@ -364,31 +364,40 @@ LTO::LTO(Config Conf, ThinBackend Backend,
// Requires a destructor for MapVector<BitcodeModule>.
LTO::~LTO() = default;
-// Add the given symbol to the GlobalResolutions map, and resolve its partition.
-void LTO::addSymbolToGlobalRes(const InputFile::Symbol &Sym,
- SymbolResolution Res, unsigned Partition) {
- auto &GlobalRes = GlobalResolutions[Sym.getName()];
- GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr();
- if (Res.Prevailing)
- GlobalRes.IRName = Sym.getIRName();
-
- // Set the partition to external if we know it is re-defined by the linker
- // with -defsym or -wrap options, used elsewhere, e.g. it is visible to a
- // regular object, is referenced from llvm.compiler_used, or was already
- // recorded as being referenced from a different partition.
- if (Res.LinkerRedefined || Res.VisibleToRegularObj || Sym.isUsed() ||
- (GlobalRes.Partition != GlobalResolution::Unknown &&
- GlobalRes.Partition != Partition)) {
- GlobalRes.Partition = GlobalResolution::External;
- } else
- // First recorded reference, save the current partition.
- GlobalRes.Partition = Partition;
-
- // Flag as visible outside of ThinLTO if visible from a regular object or
- // if this is a reference in the regular LTO partition.
- GlobalRes.VisibleOutsideThinLTO |=
- (Res.VisibleToRegularObj || Sym.isUsed() ||
- Partition == GlobalResolution::RegularLTO);
+// Add the symbols in the given module to the GlobalResolutions map, and resolve
+// their partitions.
+void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,
+ ArrayRef<SymbolResolution> Res,
+ unsigned Partition, bool InSummary) {
+ auto *ResI = Res.begin();
+ auto *ResE = Res.end();
+ (void)ResE;
+ for (const InputFile::Symbol &Sym : Syms) {
+ assert(ResI != ResE);
+ SymbolResolution Res = *ResI++;
+
+ auto &GlobalRes = GlobalResolutions[Sym.getName()];
+ GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr();
+ if (Res.Prevailing)
+ GlobalRes.IRName = Sym.getIRName();
+
+ // Set the partition to external if we know it is re-defined by the linker
+ // with -defsym or -wrap options, used elsewhere, e.g. it is visible to a
+ // regular object, is referenced from llvm.compiler_used, or was already
+ // recorded as being referenced from a different partition.
+ if (Res.LinkerRedefined || Res.VisibleToRegularObj || Sym.isUsed() ||
+ (GlobalRes.Partition != GlobalResolution::Unknown &&
+ GlobalRes.Partition != Partition)) {
+ GlobalRes.Partition = GlobalResolution::External;
+ } else
+ // First recorded reference, save the current partition.
+ GlobalRes.Partition = Partition;
+
+ // Flag as visible outside of summary if visible from a regular object or
+ // from a module that does not have a summary.
+ GlobalRes.VisibleOutsideSummary |=
+ (Res.VisibleToRegularObj || Sym.isUsed() || !InSummary);
+ }
}
static void writeToResolutionFile(raw_ostream &OS, InputFile *Input,
@@ -434,46 +443,61 @@ Error LTO::add(std::unique_ptr<InputFile> Input,
Error LTO::addModule(InputFile &Input, unsigned ModI,
const SymbolResolution *&ResI,
const SymbolResolution *ResE) {
- Expected<bool> HasThinLTOSummary = Input.Mods[ModI].hasSummary();
- if (!HasThinLTOSummary)
- return HasThinLTOSummary.takeError();
+ Expected<BitcodeLTOInfo> LTOInfo = Input.Mods[ModI].getLTOInfo();
+ if (!LTOInfo)
+ return LTOInfo.takeError();
+ BitcodeModule BM = Input.Mods[ModI];
auto ModSyms = Input.module_symbols(ModI);
- if (*HasThinLTOSummary)
- return addThinLTO(Input.Mods[ModI], ModSyms, ResI, ResE);
- else
- return addRegularLTO(Input.Mods[ModI], ModSyms, ResI, ResE);
+ addModuleToGlobalRes(ModSyms, {ResI, ResE},
+ LTOInfo->IsThinLTO ? ThinLTO.ModuleMap.size() + 1 : 0,
+ LTOInfo->HasSummary);
+
+ if (LTOInfo->IsThinLTO)
+ return addThinLTO(BM, ModSyms, ResI, ResE);
+
+ Expected<RegularLTOState::AddedModule> ModOrErr =
+ addRegularLTO(BM, ModSyms, ResI, ResE);
+ if (!ModOrErr)
+ return ModOrErr.takeError();
+
+ if (!LTOInfo->HasSummary)
+ return linkRegularLTO(std::move(*ModOrErr), /*LivenessFromIndex=*/false);
+
+ // Regular LTO module summaries are added to a dummy module that represents
+ // the combined regular LTO module.
+ if (Error Err = BM.readSummary(ThinLTO.CombinedIndex, "", -1ull))
+ return Err;
+ RegularLTO.ModsWithSummaries.push_back(std::move(*ModOrErr));
+ return Error::success();
}
// Add a regular LTO object to the link.
-Error LTO::addRegularLTO(BitcodeModule BM,
- ArrayRef<InputFile::Symbol> Syms,
- const SymbolResolution *&ResI,
- const SymbolResolution *ResE) {
- if (!RegularLTO.CombinedModule) {
- RegularLTO.CombinedModule =
- llvm::make_unique<Module>("ld-temp.o", RegularLTO.Ctx);
- RegularLTO.Mover = llvm::make_unique<IRMover>(*RegularLTO.CombinedModule);
- }
+// The resulting module needs to be linked into the combined LTO module with
+// linkRegularLTO.
+Expected<LTO::RegularLTOState::AddedModule>
+LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
+ const SymbolResolution *&ResI,
+ const SymbolResolution *ResE) {
+ RegularLTOState::AddedModule Mod;
Expected<std::unique_ptr<Module>> MOrErr =
BM.getLazyModule(RegularLTO.Ctx, /*ShouldLazyLoadMetadata*/ true,
/*IsImporting*/ false);
if (!MOrErr)
return MOrErr.takeError();
-
Module &M = **MOrErr;
+ Mod.M = std::move(*MOrErr);
+
if (Error Err = M.materializeMetadata())
- return Err;
+ return std::move(Err);
UpgradeDebugInfo(M);
ModuleSymbolTable SymTab;
SymTab.addModule(&M);
- std::vector<GlobalValue *> Keep;
-
for (GlobalVariable &GV : M.globals())
if (GV.hasAppendingLinkage())
- Keep.push_back(&GV);
+ Mod.Keep.push_back(&GV);
DenseSet<GlobalObject *> AliasedGlobals;
for (auto &GA : M.aliases())
@@ -502,7 +526,6 @@ Error LTO::addRegularLTO(BitcodeModule BM,
for (const InputFile::Symbol &Sym : Syms) {
assert(ResI != ResE);
SymbolResolution Res = *ResI++;
- addSymbolToGlobalRes(Sym, Res, 0);
assert(MsymI != MsymE);
ModuleSymbolTable::Symbol Msym = *MsymI++;
@@ -512,7 +535,7 @@ Error LTO::addRegularLTO(BitcodeModule BM,
if (Res.Prevailing) {
if (Sym.isUndefined())
continue;
- Keep.push_back(GV);
+ Mod.Keep.push_back(GV);
// For symbols re-defined with linker -wrap and -defsym options,
// set the linkage to weak to inhibit IPO. The linkage will be
// restored by the linker.
@@ -527,17 +550,14 @@ Error LTO::addRegularLTO(BitcodeModule BM,
(GV->hasLinkOnceODRLinkage() || GV->hasWeakODRLinkage() ||
GV->hasAvailableExternallyLinkage()) &&
!AliasedGlobals.count(cast<GlobalObject>(GV))) {
- // Either of the above three types of linkage indicates that the
+ // Any of the above three types of linkage indicates that the
// chosen prevailing symbol will have the same semantics as this copy of
- // the symbol, so we can link it with available_externally linkage. We
- // only need to do this if the symbol is undefined.
- GlobalValue *CombinedGV =
- RegularLTO.CombinedModule->getNamedValue(GV->getName());
- if (!CombinedGV || CombinedGV->isDeclaration()) {
- Keep.push_back(GV);
- GV->setLinkage(GlobalValue::AvailableExternallyLinkage);
- cast<GlobalObject>(GV)->setComdat(nullptr);
- }
+ // the symbol, so we may be able to link it with available_externally
+ // linkage. We will decide later whether to do that when we link this
+ // module (in linkRegularLTO), based on whether it is undefined.
+ Mod.Keep.push_back(GV);
+ GV->setLinkage(GlobalValue::AvailableExternallyLinkage);
+ cast<GlobalObject>(GV)->setComdat(nullptr);
}
}
// Common resolution: collect the maximum size/alignment over all commons.
@@ -555,25 +575,54 @@ Error LTO::addRegularLTO(BitcodeModule BM,
// FIXME: use proposed local attribute for FinalDefinitionInLinkageUnit.
}
assert(MsymI == MsymE);
+ return std::move(Mod);
+}
+
+Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod,
+ bool LivenessFromIndex) {
+ if (!RegularLTO.CombinedModule) {
+ RegularLTO.CombinedModule =
+ llvm::make_unique<Module>("ld-temp.o", RegularLTO.Ctx);
+ RegularLTO.Mover = llvm::make_unique<IRMover>(*RegularLTO.CombinedModule);
+ }
+
+ std::vector<GlobalValue *> Keep;
+ for (GlobalValue *GV : Mod.Keep) {
+ if (LivenessFromIndex && !ThinLTO.CombinedIndex.isGUIDLive(GV->getGUID()))
+ continue;
- return RegularLTO.Mover->move(std::move(*MOrErr), Keep,
+ if (!GV->hasAvailableExternallyLinkage()) {
+ Keep.push_back(GV);
+ continue;
+ }
+
+ // Only link available_externally definitions if we don't already have a
+ // definition.
+ GlobalValue *CombinedGV =
+ RegularLTO.CombinedModule->getNamedValue(GV->getName());
+ if (CombinedGV && !CombinedGV->isDeclaration())
+ continue;
+
+ Keep.push_back(GV);
+ }
+
+ return RegularLTO.Mover->move(std::move(Mod.M), Keep,
[](GlobalValue &, IRMover::ValueAdder) {},
/* IsPerformingImport */ false);
}
-// Add a ThinLTO object to the link.
-Error LTO::addThinLTO(BitcodeModule BM,
- ArrayRef<InputFile::Symbol> Syms,
+// Add a ThinLTO module to the link.
+Error LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
const SymbolResolution *&ResI,
const SymbolResolution *ResE) {
if (Error Err =
- BM.readSummary(ThinLTO.CombinedIndex, ThinLTO.ModuleMap.size()))
+ BM.readSummary(ThinLTO.CombinedIndex, BM.getModuleIdentifier(),
+ ThinLTO.ModuleMap.size()))
return Err;
for (const InputFile::Symbol &Sym : Syms) {
assert(ResI != ResE);
SymbolResolution Res = *ResI++;
- addSymbolToGlobalRes(Sym, Res, ThinLTO.ModuleMap.size() + 1);
if (Res.Prevailing) {
if (!Sym.getIRName().empty()) {
@@ -601,7 +650,7 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) {
// Compute "dead" symbols, we don't want to import/export these!
DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
for (auto &Res : GlobalResolutions) {
- if (Res.second.VisibleOutsideThinLTO &&
+ if (Res.second.VisibleOutsideSummary &&
// IRName will be defined if we have seen the prevailing copy of
// this value. If not, no need to preserve any ThinLTO copies.
!Res.second.IRName.empty())
@@ -614,7 +663,8 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) {
// Save the status of having a regularLTO combined module, as
// this is needed for generating the ThinLTO Task ID, and
// the CombinedModule will be moved at the end of runRegularLTO.
- bool HasRegularLTO = RegularLTO.CombinedModule != nullptr;
+ bool HasRegularLTO = RegularLTO.CombinedModule != nullptr ||
+ !RegularLTO.ModsWithSummaries.empty();
// Invoke regular LTO if there was a regular LTO module to start with.
if (HasRegularLTO)
if (auto E = runRegularLTO(AddStream))
@@ -623,6 +673,11 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) {
}
Error LTO::runRegularLTO(AddStreamFn AddStream) {
+ for (auto &M : RegularLTO.ModsWithSummaries)
+ if (Error Err = linkRegularLTO(std::move(M),
+ /*LivenessFromIndex=*/true))
+ return Err;
+
// Make sure commons have the right size/alignment: we kept the largest from
// all the prevailing when adding the inputs, and we apply it here.
const DataLayout &DL = RegularLTO.CombinedModule->getDataLayout();
@@ -920,17 +975,6 @@ ThinBackend lto::createWriteIndexesThinBackend(std::string OldPrefix,
};
}
-static bool IsLiveByGUID(const ModuleSummaryIndex &Index,
- GlobalValue::GUID GUID) {
- auto VI = Index.getValueInfo(GUID);
- if (!VI)
- return false;
- for (auto &I : VI.getSummaryList())
- if (Index.isGlobalValueLive(I.get()))
- return true;
- return false;
-}
-
Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
bool HasRegularLTO) {
if (ThinLTO.ModuleMap.empty())
@@ -979,7 +1023,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
auto GUID = GlobalValue::getGUID(
GlobalValue::dropLLVMManglingEscape(Res.second.IRName));
// Mark exported unless index-based analysis determined it to be dead.
- if (IsLiveByGUID(ThinLTO.CombinedIndex, GUID))
+ if (ThinLTO.CombinedIndex.isGUIDLive(GUID))
ExportedGUIDs.insert(GUID);
}
diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp
index 11f0982c6a60..3cc8b7d0e770 100644
--- a/lib/LTO/LTOModule.cpp
+++ b/lib/LTO/LTOModule.cpp
@@ -77,14 +77,12 @@ bool LTOModule::isBitcodeFile(StringRef Path) {
}
bool LTOModule::isThinLTO() {
- // Right now the detection is only based on the summary presence. We may want
- // to add a dedicated flag at some point.
- Expected<bool> Result = hasGlobalValueSummary(MBRef);
+ Expected<BitcodeLTOInfo> Result = getBitcodeLTOInfo(MBRef);
if (!Result) {
logAllUnhandledErrors(Result.takeError(), errs(), "");
return false;
}
- return *Result;
+ return Result->IsThinLTO;
}
bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer,
@@ -637,10 +635,10 @@ void LTOModule::parseMetadata() {
raw_string_ostream OS(LinkerOpts);
// Linker Options
- if (Metadata *Val = getModule().getModuleFlag("Linker Options")) {
- MDNode *LinkerOptions = cast<MDNode>(Val);
+ if (NamedMDNode *LinkerOptions =
+ getModule().getNamedMetadata("llvm.linker.options")) {
for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) {
- MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
+ MDNode *MDOptions = LinkerOptions->getOperand(i);
for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
OS << " " << MDOption->getString();
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index f1dfb91aafbb..a407691b0bd1 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -603,6 +603,8 @@ EndStmt:
Type = ELF::SHT_NOTE;
else if (TypeName == "unwind")
Type = ELF::SHT_X86_64_UNWIND;
+ else if (TypeName == "llvm_odrtab")
+ Type = ELF::SHT_LLVM_ODRTAB;
else if (TypeName.getAsInteger(0, Type))
return TokError("unknown section type");
}
diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp
index a75068ebf05a..2f4f61aa4d50 100644
--- a/lib/MC/MCSectionELF.cpp
+++ b/lib/MC/MCSectionELF.cpp
@@ -147,6 +147,8 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
// Print hex value of the flag while we do not have
// any standard symbolic representation of the flag.
OS << "0x7000001e";
+ else if (Type == ELF::SHT_LLVM_ODRTAB)
+ OS << "llvm_odrtab";
else
report_fatal_error("unsupported type 0x" + Twine::utohexstr(Type) +
" for section " + getSectionName());
diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp
index 4b3dc6e0c211..db304c027f99 100644
--- a/lib/MC/WasmObjectWriter.cpp
+++ b/lib/MC/WasmObjectWriter.cpp
@@ -181,7 +181,10 @@ class WasmObjectWriter : public MCObjectWriter {
// Index values to use for fixing up call_indirect type indices.
// Maps function symbols to the index of the type of the function
DenseMap<const MCSymbolWasm *, uint32_t> TypeIndices;
-
+ // Maps function symbols to the table element index space. Used
+ // for TABLE_INDEX relocation types (i.e. address taken functions).
+ DenseMap<const MCSymbolWasm *, uint32_t> IndirectSymbolIndices;
+ // Maps function/global symbols to the function/global index space.
DenseMap<const MCSymbolWasm *, uint32_t> SymbolIndices;
DenseMap<WasmFunctionType, int32_t, WasmFunctionTypeDenseMapInfo>
@@ -189,9 +192,8 @@ class WasmObjectWriter : public MCObjectWriter {
// TargetObjectWriter wrappers.
bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
- unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
- const MCFixup &Fixup, bool IsPCRel) const {
- return TargetObjectWriter->getRelocType(Ctx, Target, Fixup, IsPCRel);
+ unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup) const {
+ return TargetObjectWriter->getRelocType(Target, Fixup);
}
void startSection(SectionBookkeeping &Section, unsigned SectionId,
@@ -210,6 +212,7 @@ private:
DataRelocations.clear();
TypeIndices.clear();
SymbolIndices.clear();
+ IndirectSymbolIndices.clear();
FunctionTypeIndices.clear();
MCObjectWriter::reset();
}
@@ -233,7 +236,7 @@ private:
void writeTypeSection(const SmallVector<WasmFunctionType, 4> &FunctionTypes);
void writeImportSection(const SmallVector<WasmImport, 4> &Imports);
void writeFunctionSection(const SmallVector<WasmFunction, 4> &Functions);
- void writeTableSection(const SmallVector<uint32_t, 4> &TableElems);
+ void writeTableSection(uint32_t NumElements);
void writeMemorySection(const SmallVector<char, 0> &DataBytes);
void writeGlobalSection(const SmallVector<WasmGlobal, 4> &Globals);
void writeExportSection(const SmallVector<WasmExport, 4> &Exports);
@@ -402,7 +405,9 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
SymA->setUsedInReloc();
}
- unsigned Type = getRelocType(Ctx, Target, Fixup, IsPCRel);
+ assert(!IsPCRel);
+ unsigned Type = getRelocType(Target, Fixup);
+
WasmRelocationEntry Rec(FixupOffset, SymA, C, Type, &FixupSection);
if (FixupSection.hasInstructions())
@@ -464,9 +469,11 @@ static uint32_t ProvisionalValue(const WasmRelocationEntry &RelEntry) {
uint32_t WasmObjectWriter::getRelocationIndexValue(
const WasmRelocationEntry &RelEntry) {
switch (RelEntry.Type) {
- case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
+ assert(IndirectSymbolIndices.count(RelEntry.Symbol));
+ return IndirectSymbolIndices[RelEntry.Symbol];
+ case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB:
case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB:
case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32:
@@ -617,21 +624,19 @@ void WasmObjectWriter::writeFunctionSection(
endSection(Section);
}
-void WasmObjectWriter::writeTableSection(
- const SmallVector<uint32_t, 4> &TableElems) {
+void WasmObjectWriter::writeTableSection(uint32_t NumElements) {
// For now, always emit the table section, since indirect calls are not
// valid without it. In the future, we could perhaps be more clever and omit
// it if there are no indirect calls.
+
SectionBookkeeping Section;
startSection(Section, wasm::WASM_SEC_TABLE);
- // The number of tables, fixed to 1 for now.
- encodeULEB128(1, getStream());
-
- encodeSLEB128(wasm::WASM_TYPE_ANYFUNC, getStream());
-
- encodeULEB128(0, getStream()); // flags
- encodeULEB128(TableElems.size(), getStream()); // initial
+ encodeULEB128(1, getStream()); // The number of tables.
+ // Fixed to 1 for now.
+ encodeSLEB128(wasm::WASM_TYPE_ANYFUNC, getStream()); // Type of table
+ encodeULEB128(0, getStream()); // flags
+ encodeULEB128(NumElements, getStream()); // initial
endSection(Section);
}
@@ -1072,8 +1077,10 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
}
// If needed, prepare the function to be called indirectly.
- if (IsAddressTaken.count(&WS))
+ if (IsAddressTaken.count(&WS)) {
+ IndirectSymbolIndices[&WS] = TableElems.size();
TableElems.push_back(Index);
+ }
} else {
if (WS.isTemporary() && !WS.getSize())
continue;
@@ -1180,7 +1187,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
writeTypeSection(FunctionTypes);
writeImportSection(Imports);
writeFunctionSection(Functions);
- writeTableSection(TableElems);
+ writeTableSection(TableElems.size());
writeMemorySection(DataBytes);
writeGlobalSection(Globals);
writeExportSection(Exports);
diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp
index e1c35ed6a6a0..4034f9039dda 100644
--- a/lib/Object/ArchiveWriter.cpp
+++ b/lib/Object/ArchiveWriter.cpp
@@ -36,7 +36,8 @@
using namespace llvm;
NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef)
- : Buf(MemoryBuffer::getMemBuffer(BufRef, false)) {}
+ : Buf(MemoryBuffer::getMemBuffer(BufRef, false)),
+ MemberName(BufRef.getBufferIdentifier()) {}
Expected<NewArchiveMember>
NewArchiveMember::getOldMember(const object::Archive::Child &OldMember,
@@ -48,6 +49,7 @@ NewArchiveMember::getOldMember(const object::Archive::Child &OldMember,
NewArchiveMember M;
assert(M.IsNew == false);
M.Buf = MemoryBuffer::getMemBuffer(*BufOrErr, false);
+ M.MemberName = M.Buf->getBufferIdentifier();
if (!Deterministic) {
auto ModTimeOrErr = OldMember.getLastModified();
if (!ModTimeOrErr)
@@ -97,6 +99,7 @@ Expected<NewArchiveMember> NewArchiveMember::getFile(StringRef FileName,
NewArchiveMember M;
M.IsNew = true;
M.Buf = std::move(*MemberBufferOrErr);
+ M.MemberName = M.Buf->getBufferIdentifier();
if (!Deterministic) {
M.ModTime = std::chrono::time_point_cast<std::chrono::seconds>(
Status.getLastModificationTime());
@@ -185,7 +188,7 @@ printBSDMemberHeader(raw_fd_ostream &Out, StringRef Name,
}
static bool useStringTable(bool Thin, StringRef Name) {
- return Thin || Name.size() >= 16;
+ return Thin || Name.size() >= 16 || Name.contains('/');
}
static void
@@ -239,7 +242,7 @@ static void writeStringTable(raw_fd_ostream &Out, StringRef ArcName,
unsigned StartOffset = 0;
for (const NewArchiveMember &M : Members) {
StringRef Path = M.Buf->getBufferIdentifier();
- StringRef Name = sys::path::filename(Path);
+ StringRef Name = M.MemberName;
if (!useStringTable(Thin, Name))
continue;
if (StartOffset == 0) {
@@ -423,9 +426,8 @@ llvm::writeArchive(StringRef ArcName,
if (Kind == object::Archive::K_DARWIN)
Padding = OffsetToAlignment(M.Buf->getBufferSize(), 8);
- printMemberHeader(Out, Kind, Thin,
- sys::path::filename(M.Buf->getBufferIdentifier()),
- StringMapIndexIter, M.ModTime, M.UID, M.GID, M.Perms,
+ printMemberHeader(Out, Kind, Thin, M.MemberName, StringMapIndexIter,
+ M.ModTime, M.UID, M.GID, M.Perms,
M.Buf->getBufferSize() + Padding);
if (!Thin)
diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp
index 9bc28dc14a29..448fb1bd6b56 100644
--- a/lib/Object/ELF.cpp
+++ b/lib/Object/ELF.cpp
@@ -192,6 +192,7 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) {
STRINGIFY_ENUM_CASE(ELF, SHT_PREINIT_ARRAY);
STRINGIFY_ENUM_CASE(ELF, SHT_GROUP);
STRINGIFY_ENUM_CASE(ELF, SHT_SYMTAB_SHNDX);
+ STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_ODRTAB);
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES);
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH);
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verdef);
diff --git a/lib/Object/IRSymtab.cpp b/lib/Object/IRSymtab.cpp
index d21acdb1d556..a6cd5dda12d3 100644
--- a/lib/Object/IRSymtab.cpp
+++ b/lib/Object/IRSymtab.cpp
@@ -109,9 +109,9 @@ Error Builder::addModule(Module *M) {
if (TT.isOSBinFormatCOFF()) {
if (auto E = M->materializeMetadata())
return E;
- if (Metadata *Val = M->getModuleFlag("Linker Options")) {
- MDNode *LinkerOptions = cast<MDNode>(Val);
- for (const MDOperand &MDOptions : LinkerOptions->operands())
+ if (NamedMDNode *LinkerOptions =
+ M->getNamedMetadata("llvm.linker.options")) {
+ for (MDNode *MDOptions : LinkerOptions->operands())
for (const MDOperand &MDOption : cast<MDNode>(MDOptions)->operands())
COFFLinkerOptsOS << " " << cast<MDString>(MDOption)->getString();
}
diff --git a/lib/Object/WindowsResource.cpp b/lib/Object/WindowsResource.cpp
index 041659e7aa23..3f6080d48f9d 100644
--- a/lib/Object/WindowsResource.cpp
+++ b/lib/Object/WindowsResource.cpp
@@ -30,6 +30,10 @@ namespace object {
const uint32_t MIN_HEADER_SIZE = 7 * sizeof(uint32_t) + 2 * sizeof(uint16_t);
+// COFF files seem to be inconsistent with alignment between sections, just use
+// 8-byte because it makes everyone happy.
+const uint32_t SECTION_ALIGNMENT = sizeof(uint64_t);
+
static const size_t ResourceMagicSize = 16;
static const size_t NullEntrySize = 16;
@@ -66,7 +70,7 @@ ResourceEntryRef::ResourceEntryRef(BinaryStreamRef Ref,
const WindowsResource *Owner, Error &Err)
: Reader(Ref), OwningRes(Owner) {
if (loadNext())
- Err = make_error<GenericBinaryError>("Could not read first entry.",
+ Err = make_error<GenericBinaryError>("Could not read first entry.\n",
object_error::unexpected_eof);
}
@@ -133,31 +137,35 @@ Error WindowsResourceParser::parse(WindowsResource *WR) {
ResourceEntryRef Entry = EntryOrErr.get();
bool End = false;
while (!End) {
-
Data.push_back(Entry.getData());
- if (Entry.checkTypeString())
+ bool IsNewTypeString = false;
+ bool IsNewNameString = false;
+
+ Root.addEntry(Entry, IsNewTypeString, IsNewNameString);
+
+ if (IsNewTypeString)
StringTable.push_back(Entry.getTypeString());
- if (Entry.checkNameString())
+ if (IsNewNameString)
StringTable.push_back(Entry.getNameString());
- Root.addEntry(Entry);
-
RETURN_IF_ERROR(Entry.moveNext(End));
}
return Error::success();
}
-void WindowsResourceParser::printTree() const {
- ScopedPrinter Writer(outs());
+void WindowsResourceParser::printTree(raw_ostream &OS) const {
+ ScopedPrinter Writer(OS);
Root.print(Writer, "Resource Tree");
}
-void WindowsResourceParser::TreeNode::addEntry(const ResourceEntryRef &Entry) {
- TreeNode &TypeNode = addTypeNode(Entry);
- TreeNode &NameNode = TypeNode.addNameNode(Entry);
+void WindowsResourceParser::TreeNode::addEntry(const ResourceEntryRef &Entry,
+ bool &IsNewTypeString,
+ bool &IsNewNameString) {
+ TreeNode &TypeNode = addTypeNode(Entry, IsNewTypeString);
+ TreeNode &NameNode = TypeNode.addNameNode(Entry, IsNewNameString);
NameNode.addLanguageNode(Entry);
}
@@ -171,7 +179,6 @@ WindowsResourceParser::TreeNode::TreeNode(uint16_t MajorVersion,
uint32_t Characteristics)
: IsDataNode(true), MajorVersion(MajorVersion), MinorVersion(MinorVersion),
Characteristics(Characteristics) {
- if (IsDataNode)
DataIndex = DataCount++;
}
@@ -194,17 +201,19 @@ WindowsResourceParser::TreeNode::createDataNode(uint16_t MajorVersion,
}
WindowsResourceParser::TreeNode &
-WindowsResourceParser::TreeNode::addTypeNode(const ResourceEntryRef &Entry) {
+WindowsResourceParser::TreeNode::addTypeNode(const ResourceEntryRef &Entry,
+ bool &IsNewTypeString) {
if (Entry.checkTypeString())
- return addChild(Entry.getTypeString());
+ return addChild(Entry.getTypeString(), IsNewTypeString);
else
return addChild(Entry.getTypeID());
}
WindowsResourceParser::TreeNode &
-WindowsResourceParser::TreeNode::addNameNode(const ResourceEntryRef &Entry) {
+WindowsResourceParser::TreeNode::addNameNode(const ResourceEntryRef &Entry,
+ bool &IsNewNameString) {
if (Entry.checkNameString())
- return addChild(Entry.getNameString());
+ return addChild(Entry.getNameString(), IsNewNameString);
else
return addChild(Entry.getNameID());
}
@@ -232,7 +241,8 @@ WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addChild(
}
WindowsResourceParser::TreeNode &
-WindowsResourceParser::TreeNode::addChild(ArrayRef<UTF16> NameRef) {
+WindowsResourceParser::TreeNode::addChild(ArrayRef<UTF16> NameRef,
+ bool &IsNewString) {
std::string NameString;
ArrayRef<UTF16> CorrectedName;
std::vector<UTF16> EndianCorrectedName;
@@ -248,6 +258,7 @@ WindowsResourceParser::TreeNode::addChild(ArrayRef<UTF16> NameRef) {
auto Child = StringChildren.find(NameString);
if (Child == StringChildren.end()) {
auto NewChild = createStringNode();
+ IsNewString = true;
WindowsResourceParser::TreeNode &Node = *NewChild;
StringChildren.emplace(NameString, std::move(NewChild));
return Node;
@@ -296,7 +307,6 @@ class WindowsResourceCOFFWriter {
public:
WindowsResourceCOFFWriter(StringRef OutputFile, Machine MachineType,
const WindowsResourceParser &Parser, Error &E);
-
Error write();
private:
@@ -314,7 +324,8 @@ private:
void writeDirectoryStringTable();
void writeFirstSectionRelocations();
std::unique_ptr<FileOutputBuffer> Buffer;
- uint8_t *Current;
+ uint8_t *BufferStart;
+ uint64_t CurrentOffset = 0;
Machine MachineType;
const WindowsResourceParser::TreeNode &Resources;
const ArrayRef<std::vector<uint8_t>> Data;
@@ -386,6 +397,7 @@ void WindowsResourceCOFFWriter::performSectionOneLayout() {
FileSize += SectionOneSize;
FileSize += Data.size() *
llvm::COFF::RelocationSize; // one relocation for each resource.
+ FileSize = alignTo(FileSize, SECTION_ALIGNMENT);
}
void WindowsResourceCOFFWriter::performSectionTwoLayout() {
@@ -398,6 +410,7 @@ void WindowsResourceCOFFWriter::performSectionTwoLayout() {
SectionTwoSize += llvm::alignTo(Entry.size(), sizeof(uint64_t));
}
FileSize += SectionTwoSize;
+ FileSize = alignTo(FileSize, SECTION_ALIGNMENT);
}
static std::time_t getTime() {
@@ -408,7 +421,7 @@ static std::time_t getTime() {
}
Error WindowsResourceCOFFWriter::write() {
- Current = Buffer->getBufferStart();
+ BufferStart = Buffer->getBufferStart();
writeCOFFHeader();
writeFirstSectionHeader();
@@ -427,7 +440,8 @@ Error WindowsResourceCOFFWriter::write() {
void WindowsResourceCOFFWriter::writeCOFFHeader() {
// Write the COFF header.
- auto *Header = reinterpret_cast<llvm::object::coff_file_header *>(Current);
+ auto *Header =
+ reinterpret_cast<llvm::object::coff_file_header *>(BufferStart);
switch (MachineType) {
case Machine::ARM:
Header->Machine = llvm::COFF::IMAGE_FILE_MACHINE_ARMNT;
@@ -452,9 +466,9 @@ void WindowsResourceCOFFWriter::writeCOFFHeader() {
void WindowsResourceCOFFWriter::writeFirstSectionHeader() {
// Write the first section header.
- Current += sizeof(llvm::object::coff_file_header);
- auto *SectionOneHeader =
- reinterpret_cast<llvm::object::coff_section *>(Current);
+ CurrentOffset += sizeof(llvm::object::coff_file_header);
+ auto *SectionOneHeader = reinterpret_cast<llvm::object::coff_section *>(
+ BufferStart + CurrentOffset);
strncpy(SectionOneHeader->Name, ".rsrc$01", (size_t)llvm::COFF::NameSize);
SectionOneHeader->VirtualSize = 0;
SectionOneHeader->VirtualAddress = 0;
@@ -473,9 +487,9 @@ void WindowsResourceCOFFWriter::writeFirstSectionHeader() {
void WindowsResourceCOFFWriter::writeSecondSectionHeader() {
// Write the second section header.
- Current += sizeof(llvm::object::coff_section);
- auto *SectionTwoHeader =
- reinterpret_cast<llvm::object::coff_section *>(Current);
+ CurrentOffset += sizeof(llvm::object::coff_section);
+ auto *SectionTwoHeader = reinterpret_cast<llvm::object::coff_section *>(
+ BufferStart + CurrentOffset);
strncpy(SectionTwoHeader->Name, ".rsrc$02", (size_t)llvm::COFF::NameSize);
SectionTwoHeader->VirtualSize = 0;
SectionTwoHeader->VirtualAddress = 0;
@@ -492,75 +506,85 @@ void WindowsResourceCOFFWriter::writeSecondSectionHeader() {
void WindowsResourceCOFFWriter::writeFirstSection() {
// Write section one.
- Current += sizeof(llvm::object::coff_section);
+ CurrentOffset += sizeof(llvm::object::coff_section);
writeDirectoryTree();
writeDirectoryStringTable();
writeFirstSectionRelocations();
+
+ CurrentOffset = alignTo(CurrentOffset, SECTION_ALIGNMENT);
}
void WindowsResourceCOFFWriter::writeSecondSection() {
// Now write the .rsrc$02 section.
for (auto const &RawDataEntry : Data) {
- std::copy(RawDataEntry.begin(), RawDataEntry.end(), Current);
- Current += alignTo(RawDataEntry.size(), sizeof(uint64_t));
+ std::copy(RawDataEntry.begin(), RawDataEntry.end(),
+ BufferStart + CurrentOffset);
+ CurrentOffset += alignTo(RawDataEntry.size(), sizeof(uint64_t));
}
+
+ CurrentOffset = alignTo(CurrentOffset, SECTION_ALIGNMENT);
}
void WindowsResourceCOFFWriter::writeSymbolTable() {
// Now write the symbol table.
// First, the feat symbol.
- auto *Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(Current);
+ auto *Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(BufferStart +
+ CurrentOffset);
strncpy(Symbol->Name.ShortName, "@feat.00", (size_t)llvm::COFF::NameSize);
Symbol->Value = 0x11;
Symbol->SectionNumber = 0xffff;
Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL;
Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC;
Symbol->NumberOfAuxSymbols = 0;
- Current += sizeof(llvm::object::coff_symbol16);
+ CurrentOffset += sizeof(llvm::object::coff_symbol16);
// Now write the .rsrc1 symbol + aux.
- Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(Current);
+ Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(BufferStart +
+ CurrentOffset);
strncpy(Symbol->Name.ShortName, ".rsrc$01", (size_t)llvm::COFF::NameSize);
Symbol->Value = 0;
Symbol->SectionNumber = 1;
Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL;
Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC;
Symbol->NumberOfAuxSymbols = 1;
- Current += sizeof(llvm::object::coff_symbol16);
- auto *Aux =
- reinterpret_cast<llvm::object::coff_aux_section_definition *>(Current);
+ CurrentOffset += sizeof(llvm::object::coff_symbol16);
+ auto *Aux = reinterpret_cast<llvm::object::coff_aux_section_definition *>(
+ BufferStart + CurrentOffset);
Aux->Length = SectionOneSize;
Aux->NumberOfRelocations = Data.size();
Aux->NumberOfLinenumbers = 0;
Aux->CheckSum = 0;
Aux->NumberLowPart = 0;
Aux->Selection = 0;
- Current += sizeof(llvm::object::coff_aux_section_definition);
+ CurrentOffset += sizeof(llvm::object::coff_aux_section_definition);
// Now write the .rsrc2 symbol + aux.
- Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(Current);
+ Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(BufferStart +
+ CurrentOffset);
strncpy(Symbol->Name.ShortName, ".rsrc$02", (size_t)llvm::COFF::NameSize);
Symbol->Value = 0;
Symbol->SectionNumber = 2;
Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL;
Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC;
Symbol->NumberOfAuxSymbols = 1;
- Current += sizeof(llvm::object::coff_symbol16);
- Aux = reinterpret_cast<llvm::object::coff_aux_section_definition *>(Current);
+ CurrentOffset += sizeof(llvm::object::coff_symbol16);
+ Aux = reinterpret_cast<llvm::object::coff_aux_section_definition *>(
+ BufferStart + CurrentOffset);
Aux->Length = SectionTwoSize;
Aux->NumberOfRelocations = 0;
Aux->NumberOfLinenumbers = 0;
Aux->CheckSum = 0;
Aux->NumberLowPart = 0;
Aux->Selection = 0;
- Current += sizeof(llvm::object::coff_aux_section_definition);
+ CurrentOffset += sizeof(llvm::object::coff_aux_section_definition);
// Now write a symbol for each relocation.
for (unsigned i = 0; i < Data.size(); i++) {
char RelocationName[9];
sprintf(RelocationName, "$R%06X", DataOffsets[i]);
- Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(Current);
+ Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(BufferStart +
+ CurrentOffset);
strncpy(Symbol->Name.ShortName, RelocationName,
(size_t)llvm::COFF::NameSize);
Symbol->Value = DataOffsets[i];
@@ -568,14 +592,14 @@ void WindowsResourceCOFFWriter::writeSymbolTable() {
Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL;
Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC;
Symbol->NumberOfAuxSymbols = 0;
- Current += sizeof(llvm::object::coff_symbol16);
+ CurrentOffset += sizeof(llvm::object::coff_symbol16);
}
}
void WindowsResourceCOFFWriter::writeStringTable() {
// Just 4 null bytes for the string table.
- auto COFFStringTable = reinterpret_cast<uint32_t *>(Current);
- *COFFStringTable = 0;
+ auto COFFStringTable = reinterpret_cast<void *>(BufferStart + CurrentOffset);
+ memset(COFFStringTable, 0, 4);
}
void WindowsResourceCOFFWriter::writeDirectoryTree() {
@@ -593,8 +617,8 @@ void WindowsResourceCOFFWriter::writeDirectoryTree() {
while (!Queue.empty()) {
auto CurrentNode = Queue.front();
Queue.pop();
- auto *Table =
- reinterpret_cast<llvm::object::coff_resource_dir_table *>(Current);
+ auto *Table = reinterpret_cast<llvm::object::coff_resource_dir_table *>(
+ BufferStart + CurrentOffset);
Table->Characteristics = CurrentNode->getCharacteristics();
Table->TimeDateStamp = 0;
Table->MajorVersion = CurrentNode->getMajorVersion();
@@ -603,13 +627,13 @@ void WindowsResourceCOFFWriter::writeDirectoryTree() {
auto &StringChildren = CurrentNode->getStringChildren();
Table->NumberOfNameEntries = StringChildren.size();
Table->NumberOfIDEntries = IDChildren.size();
- Current += sizeof(llvm::object::coff_resource_dir_table);
+ CurrentOffset += sizeof(llvm::object::coff_resource_dir_table);
CurrentRelativeOffset += sizeof(llvm::object::coff_resource_dir_table);
// Write the directory entries immediately following each directory table.
for (auto const &Child : StringChildren) {
- auto *Entry =
- reinterpret_cast<llvm::object::coff_resource_dir_entry *>(Current);
+ auto *Entry = reinterpret_cast<llvm::object::coff_resource_dir_entry *>(
+ BufferStart + CurrentOffset);
Entry->Identifier.NameOffset =
StringTableOffsets[Child.second->getStringIndex()];
if (Child.second->checkIsDataNode()) {
@@ -624,12 +648,12 @@ void WindowsResourceCOFFWriter::writeDirectoryTree() {
sizeof(llvm::object::coff_resource_dir_entry);
Queue.push(Child.second.get());
}
- Current += sizeof(llvm::object::coff_resource_dir_entry);
+ CurrentOffset += sizeof(llvm::object::coff_resource_dir_entry);
CurrentRelativeOffset += sizeof(llvm::object::coff_resource_dir_entry);
}
for (auto const &Child : IDChildren) {
- auto *Entry =
- reinterpret_cast<llvm::object::coff_resource_dir_entry *>(Current);
+ auto *Entry = reinterpret_cast<llvm::object::coff_resource_dir_entry *>(
+ BufferStart + CurrentOffset);
Entry->Identifier.ID = Child.first;
if (Child.second->checkIsDataNode()) {
Entry->Offset.DataEntryOffset = NextLevelOffset;
@@ -643,7 +667,7 @@ void WindowsResourceCOFFWriter::writeDirectoryTree() {
sizeof(llvm::object::coff_resource_dir_entry);
Queue.push(Child.second.get());
}
- Current += sizeof(llvm::object::coff_resource_dir_entry);
+ CurrentOffset += sizeof(llvm::object::coff_resource_dir_entry);
CurrentRelativeOffset += sizeof(llvm::object::coff_resource_dir_entry);
}
}
@@ -651,14 +675,14 @@ void WindowsResourceCOFFWriter::writeDirectoryTree() {
RelocationAddresses.resize(Data.size());
// Now write all the resource data entries.
for (auto DataNodes : DataEntriesTreeOrder) {
- auto *Entry =
- reinterpret_cast<llvm::object::coff_resource_data_entry *>(Current);
+ auto *Entry = reinterpret_cast<llvm::object::coff_resource_data_entry *>(
+ BufferStart + CurrentOffset);
RelocationAddresses[DataNodes->getDataIndex()] = CurrentRelativeOffset;
Entry->DataRVA = 0; // Set to zero because it is a relocation.
Entry->DataSize = Data[DataNodes->getDataIndex()].size();
Entry->Codepage = 0;
Entry->Reserved = 0;
- Current += sizeof(llvm::object::coff_resource_data_entry);
+ CurrentOffset += sizeof(llvm::object::coff_resource_data_entry);
CurrentRelativeOffset += sizeof(llvm::object::coff_resource_data_entry);
}
}
@@ -666,17 +690,16 @@ void WindowsResourceCOFFWriter::writeDirectoryTree() {
void WindowsResourceCOFFWriter::writeDirectoryStringTable() {
// Now write the directory string table for .rsrc$01
uint32_t TotalStringTableSize = 0;
- for (auto String : StringTable) {
- auto *LengthField = reinterpret_cast<uint16_t *>(Current);
+ for (auto &String : StringTable) {
uint16_t Length = String.size();
- *LengthField = Length;
- Current += sizeof(uint16_t);
- auto *Start = reinterpret_cast<UTF16 *>(Current);
+ support::endian::write16le(BufferStart + CurrentOffset, Length);
+ CurrentOffset += sizeof(uint16_t);
+ auto *Start = reinterpret_cast<UTF16 *>(BufferStart + CurrentOffset);
std::copy(String.begin(), String.end(), Start);
- Current += Length * sizeof(UTF16);
+ CurrentOffset += Length * sizeof(UTF16);
TotalStringTableSize += Length * sizeof(UTF16) + sizeof(uint16_t);
}
- Current +=
+ CurrentOffset +=
alignTo(TotalStringTableSize, sizeof(uint32_t)) - TotalStringTableSize;
}
@@ -687,7 +710,8 @@ void WindowsResourceCOFFWriter::writeFirstSectionRelocations() {
// .rsrc section.
uint32_t NextSymbolIndex = 5;
for (unsigned i = 0; i < Data.size(); i++) {
- auto *Reloc = reinterpret_cast<llvm::object::coff_relocation *>(Current);
+ auto *Reloc = reinterpret_cast<llvm::object::coff_relocation *>(
+ BufferStart + CurrentOffset);
Reloc->VirtualAddress = RelocationAddresses[i];
Reloc->SymbolTableIndex = NextSymbolIndex++;
switch (MachineType) {
@@ -703,7 +727,7 @@ void WindowsResourceCOFFWriter::writeFirstSectionRelocations() {
default:
Reloc->Type = 0;
}
- Current += sizeof(llvm::object::coff_relocation);
+ CurrentOffset += sizeof(llvm::object::coff_relocation);
}
}
diff --git a/lib/ObjectYAML/COFFYAML.cpp b/lib/ObjectYAML/COFFYAML.cpp
index 7f9f4c1f8c2c..c8cbea1490f6 100644
--- a/lib/ObjectYAML/COFFYAML.cpp
+++ b/lib/ObjectYAML/COFFYAML.cpp
@@ -488,7 +488,16 @@ void MappingTraits<COFFYAML::Section>::mapping(IO &IO, COFFYAML::Section &Sec) {
IO.mapOptional("VirtualAddress", Sec.Header.VirtualAddress, 0U);
IO.mapOptional("VirtualSize", Sec.Header.VirtualSize, 0U);
IO.mapOptional("Alignment", Sec.Alignment, 0U);
- IO.mapRequired("SectionData", Sec.SectionData);
+
+ // If this is a .debug$S or .debug$T section parse the semantic representation
+ // of the symbols/types. If it is any other kind of section, just deal in raw
+ // bytes.
+ IO.mapOptional("SectionData", Sec.SectionData);
+ if (Sec.Name == ".debug$S")
+ IO.mapOptional("Subsections", Sec.DebugS);
+ else if (Sec.Name == ".debug$T")
+ IO.mapOptional("Types", Sec.DebugT);
+
IO.mapOptional("Relocations", Sec.Relocations);
}
diff --git a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
index 08a4bb715fac..d194420d5ef4 100644
--- a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
@@ -28,6 +28,7 @@
#include "llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h"
#include "llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h"
#include "llvm/DebugInfo/CodeView/EnumTables.h"
+#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
#include "llvm/ObjectYAML/CodeViewYAMLSymbols.h"
@@ -75,10 +76,9 @@ struct YAMLSubsectionBase {
virtual ~YAMLSubsectionBase() {}
virtual void map(IO &IO) = 0;
- virtual std::unique_ptr<DebugSubsection>
+ virtual std::shared_ptr<DebugSubsection>
toCodeViewSubsection(BumpPtrAllocator &Allocator,
- DebugStringTableSubsection *UseStrings,
- DebugChecksumsSubsection *UseChecksums) const = 0;
+ const codeview::StringsAndChecksums &SC) const = 0;
};
}
}
@@ -90,10 +90,9 @@ struct YAMLChecksumsSubsection : public YAMLSubsectionBase {
: YAMLSubsectionBase(DebugSubsectionKind::FileChecksums) {}
void map(IO &IO) override;
- std::unique_ptr<DebugSubsection>
+ std::shared_ptr<DebugSubsection>
toCodeViewSubsection(BumpPtrAllocator &Allocator,
- DebugStringTableSubsection *Strings,
- DebugChecksumsSubsection *Checksums) const override;
+ const codeview::StringsAndChecksums &SC) const override;
static Expected<std::shared_ptr<YAMLChecksumsSubsection>>
fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings,
const DebugChecksumsSubsectionRef &FC);
@@ -105,10 +104,9 @@ struct YAMLLinesSubsection : public YAMLSubsectionBase {
YAMLLinesSubsection() : YAMLSubsectionBase(DebugSubsectionKind::Lines) {}
void map(IO &IO) override;
- std::unique_ptr<DebugSubsection>
+ std::shared_ptr<DebugSubsection>
toCodeViewSubsection(BumpPtrAllocator &Allocator,
- DebugStringTableSubsection *Strings,
- DebugChecksumsSubsection *Checksums) const override;
+ const codeview::StringsAndChecksums &SC) const override;
static Expected<std::shared_ptr<YAMLLinesSubsection>>
fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings,
const DebugChecksumsSubsectionRef &Checksums,
@@ -122,10 +120,9 @@ struct YAMLInlineeLinesSubsection : public YAMLSubsectionBase {
: YAMLSubsectionBase(DebugSubsectionKind::InlineeLines) {}
void map(IO &IO) override;
- std::unique_ptr<DebugSubsection>
+ std::shared_ptr<DebugSubsection>
toCodeViewSubsection(BumpPtrAllocator &Allocator,
- DebugStringTableSubsection *Strings,
- DebugChecksumsSubsection *Checksums) const override;
+ const codeview::StringsAndChecksums &SC) const override;
static Expected<std::shared_ptr<YAMLInlineeLinesSubsection>>
fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings,
const DebugChecksumsSubsectionRef &Checksums,
@@ -139,10 +136,9 @@ struct YAMLCrossModuleExportsSubsection : public YAMLSubsectionBase {
: YAMLSubsectionBase(DebugSubsectionKind::CrossScopeExports) {}
void map(IO &IO) override;
- std::unique_ptr<DebugSubsection>
+ std::shared_ptr<DebugSubsection>
toCodeViewSubsection(BumpPtrAllocator &Allocator,
- DebugStringTableSubsection *Strings,
- DebugChecksumsSubsection *Checksums) const override;
+ const codeview::StringsAndChecksums &SC) const override;
static Expected<std::shared_ptr<YAMLCrossModuleExportsSubsection>>
fromCodeViewSubsection(const DebugCrossModuleExportsSubsectionRef &Exports);
@@ -154,10 +150,9 @@ struct YAMLCrossModuleImportsSubsection : public YAMLSubsectionBase {
: YAMLSubsectionBase(DebugSubsectionKind::CrossScopeImports) {}
void map(IO &IO) override;
- std::unique_ptr<DebugSubsection>
+ std::shared_ptr<DebugSubsection>
toCodeViewSubsection(BumpPtrAllocator &Allocator,
- DebugStringTableSubsection *Strings,
- DebugChecksumsSubsection *Checksums) const override;
+ const codeview::StringsAndChecksums &SC) const override;
static Expected<std::shared_ptr<YAMLCrossModuleImportsSubsection>>
fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings,
const DebugCrossModuleImportsSubsectionRef &Imports);
@@ -169,10 +164,9 @@ struct YAMLSymbolsSubsection : public YAMLSubsectionBase {
YAMLSymbolsSubsection() : YAMLSubsectionBase(DebugSubsectionKind::Symbols) {}
void map(IO &IO) override;
- std::unique_ptr<DebugSubsection>
+ std::shared_ptr<DebugSubsection>
toCodeViewSubsection(BumpPtrAllocator &Allocator,
- DebugStringTableSubsection *Strings,
- DebugChecksumsSubsection *Checksums) const override;
+ const codeview::StringsAndChecksums &SC) const override;
static Expected<std::shared_ptr<YAMLSymbolsSubsection>>
fromCodeViewSubsection(const DebugSymbolsSubsectionRef &Symbols);
@@ -184,10 +178,9 @@ struct YAMLStringTableSubsection : public YAMLSubsectionBase {
: YAMLSubsectionBase(DebugSubsectionKind::StringTable) {}
void map(IO &IO) override;
- std::unique_ptr<DebugSubsection>
+ std::shared_ptr<DebugSubsection>
toCodeViewSubsection(BumpPtrAllocator &Allocator,
- DebugStringTableSubsection *Strings,
- DebugChecksumsSubsection *Checksums) const override;
+ const codeview::StringsAndChecksums &SC) const override;
static Expected<std::shared_ptr<YAMLStringTableSubsection>>
fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings);
@@ -199,10 +192,9 @@ struct YAMLFrameDataSubsection : public YAMLSubsectionBase {
: YAMLSubsectionBase(DebugSubsectionKind::FrameData) {}
void map(IO &IO) override;
- std::unique_ptr<DebugSubsection>
+ std::shared_ptr<DebugSubsection>
toCodeViewSubsection(BumpPtrAllocator &Allocator,
- DebugStringTableSubsection *Strings,
- DebugChecksumsSubsection *Checksums) const override;
+ const codeview::StringsAndChecksums &SC) const override;
static Expected<std::shared_ptr<YAMLFrameDataSubsection>>
fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings,
const DebugFrameDataSubsectionRef &Frames);
@@ -215,10 +207,9 @@ struct YAMLCoffSymbolRVASubsection : public YAMLSubsectionBase {
: YAMLSubsectionBase(DebugSubsectionKind::CoffSymbolRVA) {}
void map(IO &IO) override;
- std::unique_ptr<DebugSubsection>
+ std::shared_ptr<DebugSubsection>
toCodeViewSubsection(BumpPtrAllocator &Allocator,
- DebugStringTableSubsection *Strings,
- DebugChecksumsSubsection *Checksums) const override;
+ const codeview::StringsAndChecksums &SC) const override;
static Expected<std::shared_ptr<YAMLCoffSymbolRVASubsection>>
fromCodeViewSubsection(const DebugSymbolRVASubsectionRef &RVAs);
@@ -389,34 +380,23 @@ void MappingTraits<YAMLDebugSubsection>::mapping(
Subsection.Subsection->map(IO);
}
-static std::shared_ptr<YAMLChecksumsSubsection>
-findChecksums(ArrayRef<YAMLDebugSubsection> Subsections) {
- for (const auto &SS : Subsections) {
- if (SS.Subsection->Kind == DebugSubsectionKind::FileChecksums) {
- return std::static_pointer_cast<YAMLChecksumsSubsection>(SS.Subsection);
- }
- }
-
- return nullptr;
-}
-
-std::unique_ptr<DebugSubsection> YAMLChecksumsSubsection::toCodeViewSubsection(
- BumpPtrAllocator &Allocator, DebugStringTableSubsection *UseStrings,
- DebugChecksumsSubsection *UseChecksums) const {
- assert(UseStrings && !UseChecksums);
- auto Result = llvm::make_unique<DebugChecksumsSubsection>(*UseStrings);
+std::shared_ptr<DebugSubsection> YAMLChecksumsSubsection::toCodeViewSubsection(
+ BumpPtrAllocator &Allocator,
+ const codeview::StringsAndChecksums &SC) const {
+ assert(SC.hasStrings());
+ auto Result = std::make_shared<DebugChecksumsSubsection>(*SC.strings());
for (const auto &CS : Checksums) {
Result->addChecksum(CS.FileName, CS.Kind, CS.ChecksumBytes.Bytes);
}
- return std::move(Result);
+ return Result;
}
-std::unique_ptr<DebugSubsection> YAMLLinesSubsection::toCodeViewSubsection(
- BumpPtrAllocator &Allocator, DebugStringTableSubsection *UseStrings,
- DebugChecksumsSubsection *UseChecksums) const {
- assert(UseStrings && UseChecksums);
+std::shared_ptr<DebugSubsection> YAMLLinesSubsection::toCodeViewSubsection(
+ BumpPtrAllocator &Allocator,
+ const codeview::StringsAndChecksums &SC) const {
+ assert(SC.hasStrings() && SC.hasChecksums());
auto Result =
- llvm::make_unique<DebugLinesSubsection>(*UseChecksums, *UseStrings);
+ std::make_shared<DebugLinesSubsection>(*SC.checksums(), *SC.strings());
Result->setCodeSize(Lines.CodeSize);
Result->setRelocationAddress(Lines.RelocSegment, Lines.RelocOffset);
Result->setFlags(Lines.Flags);
@@ -438,16 +418,16 @@ std::unique_ptr<DebugSubsection> YAMLLinesSubsection::toCodeViewSubsection(
}
}
}
- return llvm::cast<DebugSubsection>(std::move(Result));
+ return Result;
}
-std::unique_ptr<DebugSubsection>
+std::shared_ptr<DebugSubsection>
YAMLInlineeLinesSubsection::toCodeViewSubsection(
- BumpPtrAllocator &Allocator, DebugStringTableSubsection *UseStrings,
- DebugChecksumsSubsection *UseChecksums) const {
- assert(UseChecksums);
- auto Result = llvm::make_unique<DebugInlineeLinesSubsection>(
- *UseChecksums, InlineeLines.HasExtraFiles);
+ BumpPtrAllocator &Allocator,
+ const codeview::StringsAndChecksums &SC) const {
+ assert(SC.hasChecksums());
+ auto Result = std::make_shared<DebugInlineeLinesSubsection>(
+ *SC.checksums(), InlineeLines.HasExtraFiles);
for (const auto &Site : InlineeLines.Sites) {
Result->addInlineSite(TypeIndex(Site.Inlinee), Site.FileName,
@@ -459,56 +439,60 @@ YAMLInlineeLinesSubsection::toCodeViewSubsection(
Result->addExtraFile(EF);
}
}
- return llvm::cast<DebugSubsection>(std::move(Result));
+ return Result;
}
-std::unique_ptr<DebugSubsection>
+std::shared_ptr<DebugSubsection>
YAMLCrossModuleExportsSubsection::toCodeViewSubsection(
- BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings,
- DebugChecksumsSubsection *Checksums) const {
- auto Result = llvm::make_unique<DebugCrossModuleExportsSubsection>();
+ BumpPtrAllocator &Allocator,
+ const codeview::StringsAndChecksums &SC) const {
+ auto Result = std::make_shared<DebugCrossModuleExportsSubsection>();
for (const auto &M : Exports)
Result->addMapping(M.Local, M.Global);
- return llvm::cast<DebugSubsection>(std::move(Result));
+ return Result;
}
-std::unique_ptr<DebugSubsection>
+std::shared_ptr<DebugSubsection>
YAMLCrossModuleImportsSubsection::toCodeViewSubsection(
- BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings,
- DebugChecksumsSubsection *Checksums) const {
- auto Result = llvm::make_unique<DebugCrossModuleImportsSubsection>(*Strings);
+ BumpPtrAllocator &Allocator,
+ const codeview::StringsAndChecksums &SC) const {
+ assert(SC.hasStrings());
+
+ auto Result =
+ std::make_shared<DebugCrossModuleImportsSubsection>(*SC.strings());
for (const auto &M : Imports) {
for (const auto Id : M.ImportIds)
Result->addImport(M.ModuleName, Id);
}
- return llvm::cast<DebugSubsection>(std::move(Result));
+ return Result;
}
-std::unique_ptr<DebugSubsection> YAMLSymbolsSubsection::toCodeViewSubsection(
- BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings,
- DebugChecksumsSubsection *Checksums) const {
- auto Result = llvm::make_unique<DebugSymbolsSubsection>();
+std::shared_ptr<DebugSubsection> YAMLSymbolsSubsection::toCodeViewSubsection(
+ BumpPtrAllocator &Allocator,
+ const codeview::StringsAndChecksums &SC) const {
+ auto Result = std::make_shared<DebugSymbolsSubsection>();
for (const auto &Sym : Symbols)
Result->addSymbol(
Sym.toCodeViewSymbol(Allocator, CodeViewContainer::ObjectFile));
- return std::move(Result);
+ return Result;
}
-std::unique_ptr<DebugSubsection>
+std::shared_ptr<DebugSubsection>
YAMLStringTableSubsection::toCodeViewSubsection(
- BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings,
- DebugChecksumsSubsection *Checksums) const {
- auto Result = llvm::make_unique<DebugStringTableSubsection>();
+ BumpPtrAllocator &Allocator,
+ const codeview::StringsAndChecksums &SC) const {
+ auto Result = std::make_shared<DebugStringTableSubsection>();
for (const auto &Str : this->Strings)
Result->insert(Str);
- return std::move(Result);
+ return Result;
}
-std::unique_ptr<DebugSubsection> YAMLFrameDataSubsection::toCodeViewSubsection(
- BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings,
- DebugChecksumsSubsection *Checksums) const {
- assert(Strings);
- auto Result = llvm::make_unique<DebugFrameDataSubsection>();
+std::shared_ptr<DebugSubsection> YAMLFrameDataSubsection::toCodeViewSubsection(
+ BumpPtrAllocator &Allocator,
+ const codeview::StringsAndChecksums &SC) const {
+ assert(SC.hasStrings());
+
+ auto Result = std::make_shared<DebugFrameDataSubsection>();
for (const auto &YF : Frames) {
codeview::FrameData F;
F.CodeSize = YF.CodeSize;
@@ -519,20 +503,20 @@ std::unique_ptr<DebugSubsection> YAMLFrameDataSubsection::toCodeViewSubsection(
F.PrologSize = YF.PrologSize;
F.RvaStart = YF.RvaStart;
F.SavedRegsSize = YF.SavedRegsSize;
- F.FrameFunc = Strings->insert(YF.FrameFunc);
+ F.FrameFunc = SC.strings()->insert(YF.FrameFunc);
Result->addFrameData(F);
}
- return std::move(Result);
+ return Result;
}
-std::unique_ptr<DebugSubsection>
+std::shared_ptr<DebugSubsection>
YAMLCoffSymbolRVASubsection::toCodeViewSubsection(
- BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings,
- DebugChecksumsSubsection *Checksums) const {
- auto Result = llvm::make_unique<DebugSymbolRVASubsection>();
+ BumpPtrAllocator &Allocator,
+ const codeview::StringsAndChecksums &SC) const {
+ auto Result = std::make_shared<DebugSymbolRVASubsection>();
for (const auto &RVA : RVAs)
Result->addRVA(RVA);
- return std::move(Result);
+ return Result;
}
static Expected<SourceFileChecksumEntry>
@@ -741,63 +725,17 @@ YAMLCoffSymbolRVASubsection::fromCodeViewSubsection(
return Result;
}
-Expected<std::vector<std::unique_ptr<DebugSubsection>>>
+Expected<std::vector<std::shared_ptr<DebugSubsection>>>
llvm::CodeViewYAML::toCodeViewSubsectionList(
BumpPtrAllocator &Allocator, ArrayRef<YAMLDebugSubsection> Subsections,
- DebugStringTableSubsection &Strings) {
- std::vector<std::unique_ptr<DebugSubsection>> Result;
+ const codeview::StringsAndChecksums &SC) {
+ std::vector<std::shared_ptr<DebugSubsection>> Result;
if (Subsections.empty())
return std::move(Result);
- auto Checksums = findChecksums(Subsections);
- std::unique_ptr<DebugSubsection> ChecksumsBase;
- if (Checksums)
- ChecksumsBase =
- Checksums->toCodeViewSubsection(Allocator, &Strings, nullptr);
- DebugChecksumsSubsection *CS =
- static_cast<DebugChecksumsSubsection *>(ChecksumsBase.get());
for (const auto &SS : Subsections) {
- // We've already converted the checksums subsection, don't do it
- // twice.
- std::unique_ptr<DebugSubsection> CVS;
- if (SS.Subsection->Kind == DebugSubsectionKind::FileChecksums)
- CVS = std::move(ChecksumsBase);
- else
- CVS = SS.Subsection->toCodeViewSubsection(Allocator, &Strings, CS);
- assert(CVS != nullptr);
- Result.push_back(std::move(CVS));
- }
- return std::move(Result);
-}
-
-Expected<std::vector<std::unique_ptr<codeview::DebugSubsection>>>
-llvm::CodeViewYAML::toCodeViewSubsectionList(
- BumpPtrAllocator &Allocator, ArrayRef<YAMLDebugSubsection> Subsections,
- std::unique_ptr<DebugStringTableSubsection> &TakeStrings,
- DebugStringTableSubsection *StringsRef) {
- std::vector<std::unique_ptr<DebugSubsection>> Result;
- if (Subsections.empty())
- return std::move(Result);
-
- auto Checksums = findChecksums(Subsections);
-
- std::unique_ptr<DebugSubsection> ChecksumsBase;
- if (Checksums)
- ChecksumsBase =
- Checksums->toCodeViewSubsection(Allocator, StringsRef, nullptr);
- DebugChecksumsSubsection *CS =
- static_cast<DebugChecksumsSubsection *>(ChecksumsBase.get());
- for (const auto &SS : Subsections) {
- // We've already converted the checksums and string table subsection, don't
- // do it twice.
- std::unique_ptr<DebugSubsection> CVS;
- if (SS.Subsection->Kind == DebugSubsectionKind::FileChecksums)
- CVS = std::move(ChecksumsBase);
- else if (SS.Subsection->Kind == DebugSubsectionKind::StringTable) {
- assert(TakeStrings && "No string table!");
- CVS = std::move(TakeStrings);
- } else
- CVS = SS.Subsection->toCodeViewSubsection(Allocator, StringsRef, CS);
+ std::shared_ptr<DebugSubsection> CVS;
+ CVS = SS.Subsection->toCodeViewSubsection(Allocator, SC);
assert(CVS != nullptr);
Result.push_back(std::move(CVS));
}
@@ -810,23 +748,23 @@ struct SubsectionConversionVisitor : public DebugSubsectionVisitor {
Error visitUnknown(DebugUnknownSubsectionRef &Unknown) override;
Error visitLines(DebugLinesSubsectionRef &Lines,
- const DebugSubsectionState &State) override;
+ const StringsAndChecksumsRef &State) override;
Error visitFileChecksums(DebugChecksumsSubsectionRef &Checksums,
- const DebugSubsectionState &State) override;
+ const StringsAndChecksumsRef &State) override;
Error visitInlineeLines(DebugInlineeLinesSubsectionRef &Inlinees,
- const DebugSubsectionState &State) override;
+ const StringsAndChecksumsRef &State) override;
Error visitCrossModuleExports(DebugCrossModuleExportsSubsectionRef &Checksums,
- const DebugSubsectionState &State) override;
+ const StringsAndChecksumsRef &State) override;
Error visitCrossModuleImports(DebugCrossModuleImportsSubsectionRef &Inlinees,
- const DebugSubsectionState &State) override;
+ const StringsAndChecksumsRef &State) override;
Error visitStringTable(DebugStringTableSubsectionRef &ST,
- const DebugSubsectionState &State) override;
+ const StringsAndChecksumsRef &State) override;
Error visitSymbols(DebugSymbolsSubsectionRef &Symbols,
- const DebugSubsectionState &State) override;
+ const StringsAndChecksumsRef &State) override;
Error visitFrameData(DebugFrameDataSubsectionRef &Symbols,
- const DebugSubsectionState &State) override;
+ const StringsAndChecksumsRef &State) override;
Error visitCOFFSymbolRVAs(DebugSymbolRVASubsectionRef &Symbols,
- const DebugSubsectionState &State) override;
+ const StringsAndChecksumsRef &State) override;
YAMLDebugSubsection Subsection;
};
@@ -837,7 +775,7 @@ Error SubsectionConversionVisitor::visitUnknown(
}
Error SubsectionConversionVisitor::visitLines(
- DebugLinesSubsectionRef &Lines, const DebugSubsectionState &State) {
+ DebugLinesSubsectionRef &Lines, const StringsAndChecksumsRef &State) {
auto Result = YAMLLinesSubsection::fromCodeViewSubsection(
State.strings(), State.checksums(), Lines);
if (!Result)
@@ -847,7 +785,8 @@ Error SubsectionConversionVisitor::visitLines(
}
Error SubsectionConversionVisitor::visitFileChecksums(
- DebugChecksumsSubsectionRef &Checksums, const DebugSubsectionState &State) {
+ DebugChecksumsSubsectionRef &Checksums,
+ const StringsAndChecksumsRef &State) {
auto Result = YAMLChecksumsSubsection::fromCodeViewSubsection(State.strings(),
Checksums);
if (!Result)
@@ -858,7 +797,7 @@ Error SubsectionConversionVisitor::visitFileChecksums(
Error SubsectionConversionVisitor::visitInlineeLines(
DebugInlineeLinesSubsectionRef &Inlinees,
- const DebugSubsectionState &State) {
+ const StringsAndChecksumsRef &State) {
auto Result = YAMLInlineeLinesSubsection::fromCodeViewSubsection(
State.strings(), State.checksums(), Inlinees);
if (!Result)
@@ -869,7 +808,7 @@ Error SubsectionConversionVisitor::visitInlineeLines(
Error SubsectionConversionVisitor::visitCrossModuleExports(
DebugCrossModuleExportsSubsectionRef &Exports,
- const DebugSubsectionState &State) {
+ const StringsAndChecksumsRef &State) {
auto Result =
YAMLCrossModuleExportsSubsection::fromCodeViewSubsection(Exports);
if (!Result)
@@ -880,7 +819,7 @@ Error SubsectionConversionVisitor::visitCrossModuleExports(
Error SubsectionConversionVisitor::visitCrossModuleImports(
DebugCrossModuleImportsSubsectionRef &Imports,
- const DebugSubsectionState &State) {
+ const StringsAndChecksumsRef &State) {
auto Result = YAMLCrossModuleImportsSubsection::fromCodeViewSubsection(
State.strings(), Imports);
if (!Result)
@@ -890,7 +829,8 @@ Error SubsectionConversionVisitor::visitCrossModuleImports(
}
Error SubsectionConversionVisitor::visitStringTable(
- DebugStringTableSubsectionRef &Strings, const DebugSubsectionState &State) {
+ DebugStringTableSubsectionRef &Strings,
+ const StringsAndChecksumsRef &State) {
auto Result = YAMLStringTableSubsection::fromCodeViewSubsection(Strings);
if (!Result)
return Result.takeError();
@@ -899,7 +839,7 @@ Error SubsectionConversionVisitor::visitStringTable(
}
Error SubsectionConversionVisitor::visitSymbols(
- DebugSymbolsSubsectionRef &Symbols, const DebugSubsectionState &State) {
+ DebugSymbolsSubsectionRef &Symbols, const StringsAndChecksumsRef &State) {
auto Result = YAMLSymbolsSubsection::fromCodeViewSubsection(Symbols);
if (!Result)
return Result.takeError();
@@ -908,7 +848,7 @@ Error SubsectionConversionVisitor::visitSymbols(
}
Error SubsectionConversionVisitor::visitFrameData(
- DebugFrameDataSubsectionRef &Frames, const DebugSubsectionState &State) {
+ DebugFrameDataSubsectionRef &Frames, const StringsAndChecksumsRef &State) {
auto Result =
YAMLFrameDataSubsection::fromCodeViewSubsection(State.strings(), Frames);
if (!Result)
@@ -918,7 +858,7 @@ Error SubsectionConversionVisitor::visitFrameData(
}
Error SubsectionConversionVisitor::visitCOFFSymbolRVAs(
- DebugSymbolRVASubsectionRef &RVAs, const DebugSubsectionState &State) {
+ DebugSymbolRVASubsectionRef &RVAs, const StringsAndChecksumsRef &State) {
auto Result = YAMLCoffSymbolRVASubsection::fromCodeViewSubsection(RVAs);
if (!Result)
return Result.takeError();
@@ -927,29 +867,71 @@ Error SubsectionConversionVisitor::visitCOFFSymbolRVAs(
}
}
-Expected<YAMLDebugSubsection> YAMLDebugSubsection::fromCodeViewSubection(
- const DebugStringTableSubsectionRef &Strings,
- const DebugChecksumsSubsectionRef &Checksums,
- const DebugSubsectionRecord &SS) {
- DebugSubsectionState State(Strings, Checksums);
+Expected<YAMLDebugSubsection>
+YAMLDebugSubsection::fromCodeViewSubection(const StringsAndChecksumsRef &SC,
+ const DebugSubsectionRecord &SS) {
SubsectionConversionVisitor V;
- if (auto EC = visitDebugSubsection(SS, V, State))
+ if (auto EC = visitDebugSubsection(SS, V, SC))
return std::move(EC);
return V.Subsection;
}
-std::unique_ptr<DebugStringTableSubsection>
-llvm::CodeViewYAML::findStringTable(ArrayRef<YAMLDebugSubsection> Sections) {
- for (const auto &SS : Sections) {
- if (SS.Subsection->Kind != DebugSubsectionKind::StringTable)
- continue;
+std::vector<YAMLDebugSubsection>
+llvm::CodeViewYAML::fromDebugS(ArrayRef<uint8_t> Data,
+ const StringsAndChecksumsRef &SC) {
+ BinaryStreamReader Reader(Data, support::little);
+ uint32_t Magic;
+
+ ExitOnError Err("Invalid .debug$S section!");
+ Err(Reader.readInteger(Magic));
+ assert(Magic == COFF::DEBUG_SECTION_MAGIC && "Invalid .debug$S section!");
+
+ DebugSubsectionArray Subsections;
+ Err(Reader.readArray(Subsections, Reader.bytesRemaining()));
+
+ std::vector<YAMLDebugSubsection> Result;
- // String Table doesn't use the allocator.
- BumpPtrAllocator Allocator;
- auto Result =
- SS.Subsection->toCodeViewSubsection(Allocator, nullptr, nullptr);
- return llvm::cast<DebugStringTableSubsection>(std::move(Result));
+ for (const auto &SS : Subsections) {
+ auto YamlSS = Err(YAMLDebugSubsection::fromCodeViewSubection(SC, SS));
+ Result.push_back(YamlSS);
+ }
+ return Result;
+}
+
+void llvm::CodeViewYAML::initializeStringsAndChecksums(
+ ArrayRef<YAMLDebugSubsection> Sections, codeview::StringsAndChecksums &SC) {
+ // String Table and Checksums subsections don't use the allocator.
+ BumpPtrAllocator Allocator;
+
+ // It's possible for checksums and strings to even appear in different debug$S
+ // sections, so we have to make this a stateful function that can build up
+ // the strings and checksums field over multiple iterations.
+
+ // File Checksums require the string table, but may become before it, so we
+ // have to scan for strings first, then scan for checksums again from the
+ // beginning.
+ if (!SC.hasStrings()) {
+ for (const auto &SS : Sections) {
+ if (SS.Subsection->Kind != DebugSubsectionKind::StringTable)
+ continue;
+
+ auto Result = SS.Subsection->toCodeViewSubsection(Allocator, SC);
+ SC.setStrings(
+ std::static_pointer_cast<DebugStringTableSubsection>(Result));
+ break;
+ }
+ }
+
+ if (SC.hasStrings() && !SC.hasChecksums()) {
+ for (const auto &SS : Sections) {
+ if (SS.Subsection->Kind != DebugSubsectionKind::FileChecksums)
+ continue;
+
+ auto Result = SS.Subsection->toCodeViewSubsection(Allocator, SC);
+ SC.setChecksums(
+ std::static_pointer_cast<DebugChecksumsSubsection>(Result));
+ break;
+ }
}
- return nullptr;
}
diff --git a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
index fa3f1e0b60aa..ba3a2abe2097 100644
--- a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
@@ -35,6 +35,7 @@ LLVM_YAML_DECLARE_SCALAR_TRAITS(APSInt, false)
LLVM_YAML_DECLARE_SCALAR_TRAITS(TypeIndex, false)
LLVM_YAML_DECLARE_ENUM_TRAITS(SymbolKind)
+LLVM_YAML_DECLARE_ENUM_TRAITS(FrameCookieKind)
LLVM_YAML_DECLARE_BITSET_TRAITS(CompileSym2Flags)
LLVM_YAML_DECLARE_BITSET_TRAITS(CompileSym3Flags)
@@ -149,6 +150,15 @@ void ScalarEnumerationTraits<ThunkOrdinal>::enumeration(IO &io,
}
}
+void ScalarEnumerationTraits<FrameCookieKind>::enumeration(
+ IO &io, FrameCookieKind &FC) {
+ auto ThunkNames = getFrameCookieKindNames();
+ for (const auto &E : ThunkNames) {
+ io.enumCase(FC, E.Name.str().c_str(),
+ static_cast<FrameCookieKind>(E.Value));
+ }
+}
+
namespace llvm {
namespace CodeViewYAML {
namespace detail {
@@ -183,8 +193,47 @@ template <typename T> struct SymbolRecordImpl : public SymbolRecordBase {
mutable T Symbol;
};
+struct UnknownSymbolRecord : public SymbolRecordBase {
+ explicit UnknownSymbolRecord(codeview::SymbolKind K) : SymbolRecordBase(K) {}
+
+ void map(yaml::IO &io) override;
+
+ CVSymbol toCodeViewSymbol(BumpPtrAllocator &Allocator,
+ CodeViewContainer Container) const override {
+ RecordPrefix Prefix;
+ uint32_t TotalLen = sizeof(RecordPrefix) + Data.size();
+ Prefix.RecordKind = Kind;
+ Prefix.RecordLen = TotalLen - 2;
+ uint8_t *Buffer = Allocator.Allocate<uint8_t>(TotalLen);
+ ::memcpy(Buffer, &Prefix, sizeof(RecordPrefix));
+ ::memcpy(Buffer + sizeof(RecordPrefix), Data.data(), Data.size());
+ return CVSymbol(Kind, ArrayRef<uint8_t>(Buffer, TotalLen));
+ }
+ Error fromCodeViewSymbol(CVSymbol CVS) override {
+ this->Kind = CVS.kind();
+ Data = CVS.RecordData.drop_front(sizeof(RecordPrefix));
+ return Error::success();
+ }
+
+ std::vector<uint8_t> Data;
+};
+
template <> void SymbolRecordImpl<ScopeEndSym>::map(IO &IO) {}
+void UnknownSymbolRecord::map(yaml::IO &io) {
+ yaml::BinaryRef Binary;
+ if (io.outputting())
+ Binary = yaml::BinaryRef(Data);
+ io.mapRequired("Data", Binary);
+ if (!io.outputting()) {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ Binary.writeAsBinary(OS);
+ OS.flush();
+ Data.assign(Str.begin(), Str.end());
+ }
+}
+
template <> void SymbolRecordImpl<Thunk32Sym>::map(IO &IO) {
IO.mapRequired("Parent", Symbol.Parent);
IO.mapRequired("End", Symbol.End);
@@ -461,7 +510,7 @@ static inline Expected<CodeViewYAML::SymbolRecord>
fromCodeViewSymbolImpl(CVSymbol Symbol) {
CodeViewYAML::SymbolRecord Result;
- auto Impl = std::make_shared<SymbolRecordImpl<SymbolType>>(Symbol.kind());
+ auto Impl = std::make_shared<SymbolType>(Symbol.kind());
if (auto EC = Impl->fromCodeViewSymbol(Symbol))
return std::move(EC);
Result.Symbol = Impl;
@@ -472,12 +521,13 @@ Expected<CodeViewYAML::SymbolRecord>
CodeViewYAML::SymbolRecord::fromCodeViewSymbol(CVSymbol Symbol) {
#define SYMBOL_RECORD(EnumName, EnumVal, ClassName) \
case EnumName: \
- return fromCodeViewSymbolImpl<ClassName>(Symbol);
+ return fromCodeViewSymbolImpl<SymbolRecordImpl<ClassName>>(Symbol);
#define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName) \
SYMBOL_RECORD(EnumName, EnumVal, ClassName)
switch (Symbol.kind()) {
#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def"
- default: { llvm_unreachable("Unknown symbol kind!"); }
+ default:
+ return fromCodeViewSymbolImpl<UnknownSymbolRecord>(Symbol);
}
return make_error<CodeViewError>(cv_error_code::corrupt_record);
}
@@ -486,7 +536,7 @@ template <typename ConcreteType>
static void mapSymbolRecordImpl(IO &IO, const char *Class, SymbolKind Kind,
CodeViewYAML::SymbolRecord &Obj) {
if (!IO.outputting())
- Obj.Symbol = std::make_shared<SymbolRecordImpl<ConcreteType>>(Kind);
+ Obj.Symbol = std::make_shared<ConcreteType>(Kind);
IO.mapRequired(Class, *Obj.Symbol);
}
@@ -500,12 +550,14 @@ void MappingTraits<CodeViewYAML::SymbolRecord>::mapping(
#define SYMBOL_RECORD(EnumName, EnumVal, ClassName) \
case EnumName: \
- mapSymbolRecordImpl<ClassName>(IO, #ClassName, Kind, Obj); \
+ mapSymbolRecordImpl<SymbolRecordImpl<ClassName>>(IO, #ClassName, Kind, \
+ Obj); \
break;
#define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName) \
SYMBOL_RECORD(EnumName, EnumVal, ClassName)
switch (Kind) {
#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def"
- default: { llvm_unreachable("Unknown symbol kind!"); }
+ default:
+ mapSymbolRecordImpl<UnknownSymbolRecord>(IO, "UnknownSym", Kind, Obj);
}
}
diff --git a/lib/ObjectYAML/CodeViewYAMLTypes.cpp b/lib/ObjectYAML/CodeViewYAMLTypes.cpp
index 1302b0713d0e..a03b9cd50faa 100644
--- a/lib/ObjectYAML/CodeViewYAMLTypes.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLTypes.cpp
@@ -714,3 +714,43 @@ void MappingTraits<MemberRecord>::mapping(IO &IO, MemberRecord &Obj) {
default: { llvm_unreachable("Unknown member kind!"); }
}
}
+
+std::vector<LeafRecord>
+llvm::CodeViewYAML::fromDebugT(ArrayRef<uint8_t> DebugT) {
+ ExitOnError Err("Invalid .debug$T section!");
+ BinaryStreamReader Reader(DebugT, support::little);
+ CVTypeArray Types;
+ uint32_t Magic;
+
+ Err(Reader.readInteger(Magic));
+ assert(Magic == COFF::DEBUG_SECTION_MAGIC && "Invalid .debug$T section!");
+
+ std::vector<LeafRecord> Result;
+ Err(Reader.readArray(Types, Reader.bytesRemaining()));
+ for (const auto &T : Types) {
+ auto CVT = Err(LeafRecord::fromCodeViewRecord(T));
+ Result.push_back(CVT);
+ }
+ return Result;
+}
+
+ArrayRef<uint8_t> llvm::CodeViewYAML::toDebugT(ArrayRef<LeafRecord> Leafs,
+ BumpPtrAllocator &Alloc) {
+ TypeTableBuilder TTB(Alloc, false);
+ uint32_t Size = sizeof(uint32_t);
+ for (const auto &Leaf : Leafs) {
+ CVType T = Leaf.toCodeViewRecord(TTB);
+ Size += T.length();
+ assert(T.length() % 4 == 0 && "Improper type record alignment!");
+ }
+ uint8_t *ResultBuffer = Alloc.Allocate<uint8_t>(Size);
+ MutableArrayRef<uint8_t> Output(ResultBuffer, Size);
+ BinaryStreamWriter Writer(Output, support::little);
+ ExitOnError Err("Error writing type record to .debug$T section");
+ Err(Writer.writeInteger<uint32_t>(COFF::DEBUG_SECTION_MAGIC));
+ for (const auto &R : TTB.records()) {
+ Err(Writer.writeBytes(R));
+ }
+ assert(Writer.bytesRemaining() == 0 && "Didn't write all type record bytes!");
+ return Output;
+}
diff --git a/lib/ObjectYAML/ELFYAML.cpp b/lib/ObjectYAML/ELFYAML.cpp
index 70e25ea504a0..dbd5498e003d 100644
--- a/lib/ObjectYAML/ELFYAML.cpp
+++ b/lib/ObjectYAML/ELFYAML.cpp
@@ -372,6 +372,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
ECase(SHT_GROUP);
ECase(SHT_SYMTAB_SHNDX);
ECase(SHT_LOOS);
+ ECase(SHT_LLVM_ODRTAB);
ECase(SHT_GNU_ATTRIBUTES);
ECase(SHT_GNU_HASH);
ECase(SHT_GNU_verdef);
diff --git a/lib/Option/Arg.cpp b/lib/Option/Arg.cpp
index e416df6a38dc..e581fee8bf38 100644
--- a/lib/Option/Arg.cpp
+++ b/lib/Option/Arg.cpp
@@ -1,4 +1,4 @@
-//===--- Arg.cpp - Argument Implementations -------------------------------===//
+//===- Arg.cpp - Argument Implementations ---------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,11 +7,11 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Option/Arg.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -67,7 +67,7 @@ LLVM_DUMP_METHOD void Arg::dump() const { print(dbgs()); }
std::string Arg::getAsString(const ArgList &Args) const {
SmallString<256> Res;
- llvm::raw_svector_ostream OS(Res);
+ raw_svector_ostream OS(Res);
ArgStringList ASL;
render(Args, ASL);
@@ -98,7 +98,7 @@ void Arg::render(const ArgList &Args, ArgStringList &Output) const {
case Option::RenderCommaJoinedStyle: {
SmallString<256> Res;
- llvm::raw_svector_ostream OS(Res);
+ raw_svector_ostream OS(Res);
OS << getSpelling();
for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
if (i) OS << ',';
diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp
index 39dbce87f9ae..cbccc1935d3c 100644
--- a/lib/Option/ArgList.cpp
+++ b/lib/Option/ArgList.cpp
@@ -1,4 +1,4 @@
-//===--- ArgList.cpp - Argument List Management ---------------------------===//
+//===- ArgList.cpp - Argument List Management -----------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,14 +7,25 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Option/ArgList.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Option/Arg.h"
+#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
+#include "llvm/Option/OptSpecifier.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
using namespace llvm;
using namespace llvm::opt;
@@ -197,8 +208,6 @@ void ArgList::print(raw_ostream &O) const {
LLVM_DUMP_METHOD void ArgList::dump() const { print(dbgs()); }
#endif
-//
-
void InputArgList::releaseMemory() {
// An InputArgList always owns its arguments.
for (Arg *A : *this)
@@ -234,8 +243,6 @@ const char *InputArgList::MakeArgStringRef(StringRef Str) const {
return getArgString(MakeIndex(Str));
}
-//
-
DerivedArgList::DerivedArgList(const InputArgList &BaseArgs)
: BaseArgs(BaseArgs) {}
diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp
index b00d21ec8f67..52a81ff0e159 100644
--- a/lib/Option/OptTable.cpp
+++ b/lib/Option/OptTable.cpp
@@ -1,4 +1,4 @@
-//===--- OptTable.cpp - Option Table Implementation -----------------------===//
+//===- OptTable.cpp - Option Table Implementation -------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,16 +7,25 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Option/OptTable.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
+#include "llvm/Option/OptSpecifier.h"
+#include "llvm/Option/OptTable.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
+#include <cassert>
#include <cctype>
+#include <cstring>
#include <map>
+#include <string>
+#include <utility>
+#include <vector>
using namespace llvm;
using namespace llvm::opt;
@@ -80,14 +89,14 @@ static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) {
static inline bool operator<(const OptTable::Info &I, const char *Name) {
return StrCmpOptionNameIgnoreCase(I.Name, Name) < 0;
}
-}
-}
+
+} // end namespace opt
+} // end namespace llvm
OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {}
OptTable::OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase)
- : OptionInfos(OptionInfos), IgnoreCase(IgnoreCase), TheInputOptionID(0),
- TheUnknownOptionID(0), FirstSearchableIndex(0) {
+ : OptionInfos(OptionInfos), IgnoreCase(IgnoreCase) {
// Explicitly zero initialize the error to work around a bug in array
// value-initialization on MinGW with gcc 4.3.5.
@@ -138,8 +147,8 @@ OptTable::OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase)
}
// Build prefix chars.
- for (llvm::StringSet<>::const_iterator I = PrefixesUnion.begin(),
- E = PrefixesUnion.end(); I != E; ++I) {
+ for (StringSet<>::const_iterator I = PrefixesUnion.begin(),
+ E = PrefixesUnion.end(); I != E; ++I) {
StringRef Prefix = I->getKey();
for (StringRef::const_iterator C = Prefix.begin(), CE = Prefix.end();
C != CE; ++C)
@@ -148,8 +157,7 @@ OptTable::OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase)
}
}
-OptTable::~OptTable() {
-}
+OptTable::~OptTable() = default;
const Option OptTable::getOption(OptSpecifier Opt) const {
unsigned id = Opt.getID();
@@ -159,11 +167,11 @@ const Option OptTable::getOption(OptSpecifier Opt) const {
return Option(&getInfo(id), this);
}
-static bool isInput(const llvm::StringSet<> &Prefixes, StringRef Arg) {
+static bool isInput(const StringSet<> &Prefixes, StringRef Arg) {
if (Arg == "-")
return true;
- for (llvm::StringSet<>::const_iterator I = Prefixes.begin(),
- E = Prefixes.end(); I != E; ++I)
+ for (StringSet<>::const_iterator I = Prefixes.begin(),
+ E = Prefixes.end(); I != E; ++I)
if (Arg.startswith(I->getKey()))
return false;
return true;
@@ -346,7 +354,7 @@ static std::string getOptionHelpName(const OptTable &Opts, OptSpecifier Id) {
static void PrintHelpOptionList(raw_ostream &OS, StringRef Title,
std::vector<std::pair<std::string,
- const char*> > &OptionHelp) {
+ const char*>> &OptionHelp) {
OS << Title << ":\n";
// Find the maximum option length.
@@ -412,8 +420,8 @@ void OptTable::PrintHelp(raw_ostream &OS, const char *Name, const char *Title,
// Render help text into a map of group-name to a list of (option, help)
// pairs.
- typedef std::map<std::string,
- std::vector<std::pair<std::string, const char*> > > helpmap_ty;
+ using helpmap_ty =
+ std::map<std::string, std::vector<std::pair<std::string, const char*>>>;
helpmap_ty GroupedOptionHelp;
for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
diff --git a/lib/Option/Option.cpp b/lib/Option/Option.cpp
index 736b939fe80b..4832e659f026 100644
--- a/lib/Option/Option.cpp
+++ b/lib/Option/Option.cpp
@@ -1,4 +1,4 @@
-//===--- Option.cpp - Abstract Driver Options -----------------------------===//
+//===- Option.cpp - Abstract Driver Options -------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,22 +7,24 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Option/Option.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Option/OptTable.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cassert>
+#include <cstring>
using namespace llvm;
using namespace llvm::opt;
Option::Option(const OptTable::Info *info, const OptTable *owner)
: Info(info), Owner(owner) {
-
// Multi-level aliases are not supported. This just simplifies option
// tracking, it is not an inherent limitation.
assert((!Info || !getAlias().isValid() || !getAlias().getAlias().isValid()) &&
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
index 1f638e768307..afd66f55720a 100644
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -160,6 +160,10 @@ static cl::opt<bool>
cl::Hidden, cl::ZeroOrMore,
cl::desc("Run NewGVN instead of GVN"));
+static cl::opt<bool> EnableEarlyCSEMemSSA(
+ "enable-npm-earlycse-memssa", cl::init(false), cl::Hidden,
+ cl::desc("Enable the EarlyCSE w/ MemorySSA pass for the new PM (default = off)"));
+
static cl::opt<bool> EnableGVNHoist(
"enable-npm-gvn-hoist", cl::init(false), cl::Hidden,
cl::desc("Enable the GVN hoisting pass for the new PM (default = off)"));
@@ -312,7 +316,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(SROA());
// Catch trivial redundancies
- FPM.addPass(EarlyCSEPass());
+ FPM.addPass(EarlyCSEPass(EnableEarlyCSEMemSSA));
// Hoisting of scalars and load expressions.
if (EnableGVNHoist)
diff --git a/lib/Support/BinaryStreamWriter.cpp b/lib/Support/BinaryStreamWriter.cpp
index b22eb1ed12d0..c4276518b191 100644
--- a/lib/Support/BinaryStreamWriter.cpp
+++ b/lib/Support/BinaryStreamWriter.cpp
@@ -83,6 +83,8 @@ Error BinaryStreamWriter::padToAlignment(uint32_t Align) {
uint32_t NewOffset = alignTo(Offset, Align);
if (NewOffset > getLength())
return make_error<BinaryStreamError>(stream_error_code::stream_too_short);
- Offset = NewOffset;
+ while (Offset < NewOffset)
+ if (auto EC = writeInteger('\0'))
+ return EC;
return Error::success();
}
diff --git a/lib/Support/DebugCounter.cpp b/lib/Support/DebugCounter.cpp
index a10ac8e85396..1d46de04ee6a 100644
--- a/lib/Support/DebugCounter.cpp
+++ b/lib/Support/DebugCounter.cpp
@@ -102,9 +102,13 @@ void DebugCounter::push_back(const std::string &Val) {
}
}
-void DebugCounter::print(raw_ostream &OS) {
+void DebugCounter::print(raw_ostream &OS) const {
OS << "Counters and values:\n";
for (const auto &KV : Counters)
OS << left_justify(RegisteredCounters[KV.first], 32) << ": {"
<< KV.second.first << "," << KV.second.second << "}\n";
}
+
+LLVM_DUMP_METHOD void DebugCounter::dump() const {
+ print(dbgs());
+}
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index c9bca7f4c1ab..4496d06a15f3 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
// FoldingSetNodeIDRef Implementation
/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
-/// used to lookup the node in the FoldingSetImpl.
+/// used to lookup the node in the FoldingSetBase.
unsigned FoldingSetNodeIDRef::ComputeHash() const {
return static_cast<unsigned>(hash_combine_range(Data, Data+Size));
}
@@ -142,7 +142,7 @@ void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) {
}
/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to
-/// lookup the node in the FoldingSetImpl.
+/// lookup the node in the FoldingSetBase.
unsigned FoldingSetNodeID::ComputeHash() const {
return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash();
}
@@ -180,7 +180,7 @@ FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const {
}
//===----------------------------------------------------------------------===//
-/// Helper functions for FoldingSetImpl.
+/// Helper functions for FoldingSetBase.
/// GetNextPtr - In order to save space, each bucket is a
/// singly-linked-list. In order to make deletion more efficient, we make
@@ -188,12 +188,12 @@ FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const {
/// The problem with this is that the start of the hash buckets are not
/// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null:
/// use GetBucketPtr when this happens.
-static FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) {
+static FoldingSetBase::Node *GetNextPtr(void *NextInBucketPtr) {
// The low bit is set if this is the pointer back to the bucket.
if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1)
return nullptr;
- return static_cast<FoldingSetImpl::Node*>(NextInBucketPtr);
+ return static_cast<FoldingSetBase::Node*>(NextInBucketPtr);
}
@@ -221,11 +221,11 @@ static void **AllocateBuckets(unsigned NumBuckets) {
}
//===----------------------------------------------------------------------===//
-// FoldingSetImpl Implementation
+// FoldingSetBase Implementation
-void FoldingSetImpl::anchor() {}
+void FoldingSetBase::anchor() {}
-FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) {
+FoldingSetBase::FoldingSetBase(unsigned Log2InitSize) {
assert(5 < Log2InitSize && Log2InitSize < 32 &&
"Initial hash table size out of range");
NumBuckets = 1 << Log2InitSize;
@@ -233,14 +233,14 @@ FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) {
NumNodes = 0;
}
-FoldingSetImpl::FoldingSetImpl(FoldingSetImpl &&Arg)
+FoldingSetBase::FoldingSetBase(FoldingSetBase &&Arg)
: Buckets(Arg.Buckets), NumBuckets(Arg.NumBuckets), NumNodes(Arg.NumNodes) {
Arg.Buckets = nullptr;
Arg.NumBuckets = 0;
Arg.NumNodes = 0;
}
-FoldingSetImpl &FoldingSetImpl::operator=(FoldingSetImpl &&RHS) {
+FoldingSetBase &FoldingSetBase::operator=(FoldingSetBase &&RHS) {
free(Buckets); // This may be null if the set is in a moved-from state.
Buckets = RHS.Buckets;
NumBuckets = RHS.NumBuckets;
@@ -251,11 +251,11 @@ FoldingSetImpl &FoldingSetImpl::operator=(FoldingSetImpl &&RHS) {
return *this;
}
-FoldingSetImpl::~FoldingSetImpl() {
+FoldingSetBase::~FoldingSetBase() {
free(Buckets);
}
-void FoldingSetImpl::clear() {
+void FoldingSetBase::clear() {
// Set all but the last bucket to null pointers.
memset(Buckets, 0, NumBuckets*sizeof(void*));
@@ -266,7 +266,7 @@ void FoldingSetImpl::clear() {
NumNodes = 0;
}
-void FoldingSetImpl::GrowBucketCount(unsigned NewBucketCount) {
+void FoldingSetBase::GrowBucketCount(unsigned NewBucketCount) {
assert((NewBucketCount > NumBuckets) && "Can't shrink a folding set with GrowBucketCount");
assert(isPowerOf2_32(NewBucketCount) && "Bad bucket count!");
void **OldBuckets = Buckets;
@@ -300,11 +300,11 @@ void FoldingSetImpl::GrowBucketCount(unsigned NewBucketCount) {
/// GrowHashTable - Double the size of the hash table and rehash everything.
///
-void FoldingSetImpl::GrowHashTable() {
+void FoldingSetBase::GrowHashTable() {
GrowBucketCount(NumBuckets * 2);
}
-void FoldingSetImpl::reserve(unsigned EltCount) {
+void FoldingSetBase::reserve(unsigned EltCount) {
// This will give us somewhere between EltCount / 2 and
// EltCount buckets. This puts us in the load factor
// range of 1.0 - 2.0.
@@ -316,9 +316,9 @@ void FoldingSetImpl::reserve(unsigned EltCount) {
/// FindNodeOrInsertPos - Look up the node specified by ID. If it exists,
/// return it. If not, return the insertion token that will make insertion
/// faster.
-FoldingSetImpl::Node
-*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
- void *&InsertPos) {
+FoldingSetBase::Node *
+FoldingSetBase::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
+ void *&InsertPos) {
unsigned IDHash = ID.ComputeHash();
void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets);
void *Probe = *Bucket;
@@ -342,7 +342,7 @@ FoldingSetImpl::Node
/// InsertNode - Insert the specified node into the folding set, knowing that it
/// is not already in the map. InsertPos must be obtained from
/// FindNodeOrInsertPos.
-void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) {
+void FoldingSetBase::InsertNode(Node *N, void *InsertPos) {
assert(!N->getNextInBucket());
// Do we need to grow the hashtable?
if (NumNodes+1 > capacity()) {
@@ -371,7 +371,7 @@ void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) {
/// RemoveNode - Remove a node from the folding set, returning true if one was
/// removed or false if the node was not in the folding set.
-bool FoldingSetImpl::RemoveNode(Node *N) {
+bool FoldingSetBase::RemoveNode(Node *N) {
// Because each bucket is a circular list, we don't need to compute N's hash
// to remove it.
void *Ptr = N->getNextInBucket();
@@ -412,7 +412,7 @@ bool FoldingSetImpl::RemoveNode(Node *N) {
/// GetOrInsertNode - If there is an existing simple Node exactly
/// equal to the specified node, return it. Otherwise, insert 'N' and it
/// instead.
-FoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) {
+FoldingSetBase::Node *FoldingSetBase::GetOrInsertNode(FoldingSetBase::Node *N) {
FoldingSetNodeID ID;
GetNodeProfile(N, ID);
void *IP;
diff --git a/lib/Support/ThreadPool.cpp b/lib/Support/ThreadPool.cpp
index db03a4d6240d..22b7550d4971 100644
--- a/lib/Support/ThreadPool.cpp
+++ b/lib/Support/ThreadPool.cpp
@@ -53,11 +53,7 @@ ThreadPool::ThreadPool(unsigned ThreadCount)
Tasks.pop();
}
// Run the task we just grabbed
-#ifndef _MSC_VER
Task();
-#else
- Task(/* unused */ false);
-#endif
{
// Adjust `ActiveThreads`, in case someone waits on ThreadPool::wait()
@@ -82,7 +78,7 @@ void ThreadPool::wait() {
[&] { return !ActiveThreads && Tasks.empty(); });
}
-std::shared_future<ThreadPool::VoidTy> ThreadPool::asyncImpl(TaskTy Task) {
+std::shared_future<void> ThreadPool::asyncImpl(TaskTy Task) {
/// Wrap the Task in a packaged_task to return a future object.
PackagedTaskTy PackagedTask(std::move(Task));
auto Future = PackagedTask.get_future();
@@ -128,25 +124,16 @@ void ThreadPool::wait() {
while (!Tasks.empty()) {
auto Task = std::move(Tasks.front());
Tasks.pop();
-#ifndef _MSC_VER
- Task();
-#else
- Task(/* unused */ false);
-#endif
+ Task();
}
}
-std::shared_future<ThreadPool::VoidTy> ThreadPool::asyncImpl(TaskTy Task) {
-#ifndef _MSC_VER
+std::shared_future<void> ThreadPool::asyncImpl(TaskTy Task) {
// Get a Future with launch::deferred execution using std::async
auto Future = std::async(std::launch::deferred, std::move(Task)).share();
// Wrap the future so that both ThreadPool::wait() can operate and the
// returned future can be sync'ed on.
PackagedTaskTy PackagedTask([Future]() { Future.get(); });
-#else
- auto Future = std::async(std::launch::deferred, std::move(Task), false).share();
- PackagedTaskTy PackagedTask([Future](bool) -> bool { Future.get(); return false; });
-#endif
Tasks.push(std::move(PackagedTask));
return Future;
}
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index 7d3537e20727..2df0eaff47e5 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -163,16 +163,6 @@ static void SetMemoryLimits (unsigned size)
r.rlim_cur = limit;
setrlimit (RLIMIT_RSS, &r);
#endif
-#ifdef RLIMIT_AS // e.g. NetBSD doesn't have it.
- // Don't set virtual memory limit if built with any Sanitizer. They need 80Tb
- // of virtual memory for shadow memory mapping.
-#if !LLVM_MEMORY_SANITIZER_BUILD && !LLVM_ADDRESS_SANITIZER_BUILD
- // Virtual memory.
- getrlimit (RLIMIT_AS, &r);
- r.rlim_cur = limit;
- setrlimit (RLIMIT_AS, &r);
-#endif
-#endif
#endif
}
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index 83f7147dc9f6..b2636e1e6cb4 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -11,20 +11,28 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/TableGen/Record.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
#include <cassert>
#include <cstdint>
-#include <new>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -162,7 +170,8 @@ RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) {
// Initializer implementations
//===----------------------------------------------------------------------===//
-void Init::anchor() { }
+void Init::anchor() {}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void Init::dump() const { return print(errs()); }
#endif
@@ -301,7 +310,6 @@ static Init *fixBitInit(const RecordVal *RV, Init *Before, Init *After) {
// resolveReferences - If there are any field references that refer to fields
// that have been filled in, we can propagate the values now.
-//
Init *BitsInit::resolveReferences(Record &R, const RecordVal *RV) const {
bool Changed = false;
SmallVector<Init *, 16> NewBits(getNumBits());
@@ -615,7 +623,7 @@ void UnOpInit::Profile(FoldingSetNodeID &ID) const {
Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
switch (getOpcode()) {
- case CAST: {
+ case CAST:
if (isa<StringRecTy>(getType())) {
if (StringInit *LHSs = dyn_cast<StringInit>(LHS))
return LHSs;
@@ -680,15 +688,15 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
}
}
break;
- }
- case HEAD: {
+
+ case HEAD:
if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
assert(!LHSl->empty() && "Empty list in head");
return LHSl->getElement(0);
}
break;
- }
- case TAIL: {
+
+ case TAIL:
if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
assert(!LHSl->empty() && "Empty list in tail");
// Note the +1. We can't just pass the result of getValues()
@@ -696,16 +704,14 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
return ListInit::get(LHSl->getValues().slice(1), LHSl->getType());
}
break;
- }
- case EMPTY: {
+
+ case EMPTY:
if (ListInit *LHSl = dyn_cast<ListInit>(LHS))
return IntInit::get(LHSl->empty());
if (StringInit *LHSs = dyn_cast<StringInit>(LHS))
return IntInit::get(LHSs->getValue().empty());
-
break;
}
- }
return const_cast<UnOpInit *>(this);
}
@@ -948,7 +954,6 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg,
static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
Record *CurRec, MultiClass *CurMultiClass) {
-
OpInit *RHSo = dyn_cast<OpInit>(RHS);
if (!RHSo)
@@ -1245,7 +1250,7 @@ VarInit *VarInit::get(StringRef VN, RecTy *T) {
}
VarInit *VarInit::get(Init *VN, RecTy *T) {
- typedef std::pair<RecTy *, Init *> Key;
+ using Key = std::pair<RecTy *, Init *>;
static DenseMap<Key, VarInit*> ThePool;
Key TheKey(std::make_pair(T, VN));
@@ -1320,7 +1325,7 @@ Init *VarInit::resolveReferences(Record &R, const RecordVal *RV) const {
}
VarBitInit *VarBitInit::get(TypedInit *T, unsigned B) {
- typedef std::pair<TypedInit *, unsigned> Key;
+ using Key = std::pair<TypedInit *, unsigned>;
static DenseMap<Key, VarBitInit*> ThePool;
Key TheKey(std::make_pair(T, B));
@@ -1352,7 +1357,7 @@ Init *VarBitInit::resolveReferences(Record &R, const RecordVal *RV) const {
VarListElementInit *VarListElementInit::get(TypedInit *T,
unsigned E) {
- typedef std::pair<TypedInit *, unsigned> Key;
+ using Key = std::pair<TypedInit *, unsigned>;
static DenseMap<Key, VarListElementInit*> ThePool;
Key TheKey(std::make_pair(T, E));
@@ -1422,7 +1427,7 @@ std::string DefInit::getAsString() const {
}
FieldInit *FieldInit::get(Init *R, StringInit *FN) {
- typedef std::pair<Init *, StringInit *> Key;
+ using Key = std::pair<Init *, StringInit *>;
static DenseMap<Key, FieldInit*> ThePool;
Key TheKey(std::make_pair(R, FN));
diff --git a/lib/TableGen/SetTheory.cpp b/lib/TableGen/SetTheory.cpp
index a4d33051b4f7..733e0aeef623 100644
--- a/lib/TableGen/SetTheory.cpp
+++ b/lib/TableGen/SetTheory.cpp
@@ -12,18 +12,29 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/TableGen/SetTheory.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/SetTheory.h"
+#include <algorithm>
+#include <cstdint>
+#include <string>
+#include <utility>
using namespace llvm;
// Define the standard operators.
namespace {
-typedef SetTheory::RecSet RecSet;
-typedef SetTheory::RecVec RecVec;
+using RecSet = SetTheory::RecSet;
+using RecVec = SetTheory::RecVec;
// (add a, b, ...) Evaluate and union all arguments.
struct AddOp : public SetTheory::Operator {
@@ -237,13 +248,13 @@ struct FieldExpander : public SetTheory::Expander {
ST.evaluate(Def->getValueInit(FieldName), Elts, Def->getLoc());
}
};
+
} // end anonymous namespace
// Pin the vtables to this file.
void SetTheory::Operator::anchor() {}
void SetTheory::Expander::anchor() {}
-
SetTheory::SetTheory() {
addOperator("add", llvm::make_unique<AddOp>());
addOperator("sub", llvm::make_unique<SubOp>());
@@ -321,4 +332,3 @@ const RecVec *SetTheory::expand(Record *Set) {
// Set is not expandable.
return nullptr;
}
-
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index abe28460c83a..53eef79c4df3 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -362,6 +362,7 @@ def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily",
def : ProcessorModel<"generic", NoSchedModel, [
FeatureFPARMv8,
+ FeatureFuseAES,
FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index e8fcf1a0e9b7..7bf2097c17ce 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -1282,6 +1282,10 @@ unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
bool WantResult) {
assert(LHSReg && RHSReg && "Invalid register number.");
+ if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
+ RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
+ return 0;
+
if (RetVT != MVT::i32 && RetVT != MVT::i64)
return 0;
@@ -1362,6 +1366,8 @@ unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
uint64_t ShiftImm, bool SetFlags,
bool WantResult) {
assert(LHSReg && RHSReg && "Invalid register number.");
+ assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
+ RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
if (RetVT != MVT::i32 && RetVT != MVT::i64)
return 0;
@@ -1403,6 +1409,8 @@ unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
uint64_t ShiftImm, bool SetFlags,
bool WantResult) {
assert(LHSReg && RHSReg && "Invalid register number.");
+ assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
+ RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
if (RetVT != MVT::i32 && RetVT != MVT::i64)
return 0;
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 059556a560c0..083ca2156598 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9366,7 +9366,7 @@ static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
return SDValue();
StoreSDNode *S = cast<StoreSDNode>(N);
- if (S->isVolatile())
+ if (S->isVolatile() || S->isIndexed())
return SDValue();
SDValue StVal = S->getValue();
diff --git a/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/lib/Target/AArch64/AArch64SchedFalkorDetails.td
index 7402bcf1346c..3d737402022d 100644
--- a/lib/Target/AArch64/AArch64SchedFalkorDetails.td
+++ b/lib/Target/AArch64/AArch64SchedFalkorDetails.td
@@ -160,6 +160,21 @@ def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
let NumMicroOps = 2;
}
+def FalkorWr_1VX_1VY_12cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 12;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 14;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 21;
+ let NumMicroOps = 2;
+}
+
def FalkorWr_1GTOV_1VXVY_2cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitVXVY]> {
let Latency = 2;
let NumMicroOps = 2;
@@ -195,10 +210,10 @@ def FalkorWr_1X_1Z_8cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
let ResourceCycles = [2, 8];
}
-def FalkorWr_1X_1Z_16cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
- let Latency = 16;
+def FalkorWr_1X_1Z_11cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
+ let Latency = 11;
let NumMicroOps = 2;
- let ResourceCycles = [2, 16];
+ let ResourceCycles = [2, 11];
}
def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> {
@@ -289,9 +304,27 @@ def FalkorWr_1XYZ_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitVSD, Fa
//===----------------------------------------------------------------------===//
// Define 4 micro-op types
-def FalkorWr_2VX_2VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
- FalkorUnitVX, FalkorUnitVY]> {
- let Latency = 2;
+def FalkorWr_2VX_2VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
+ FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 14;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_2VX_2VY_20cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
+ FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 20;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_2VX_2VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
+ FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 21;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_2VX_2VY_24cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
+ FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 24;
let NumMicroOps = 4;
}
@@ -575,7 +608,8 @@ def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i
def : InstRW<[FalkorWr_2VXVY_2cyc], (instrs FCVTLv4i16, FCVTLv2i32)>;
def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)$")>;
-def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^(FDIV|FSQRT)v2f32$")>;
+def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVv2f32)>;
+def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTv2f32)>;
def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32)$")>;
@@ -592,7 +626,10 @@ def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc],
def : InstRW<[FalkorWr_3VXVY_4cyc], (instrs FCVTNv4i16, FCVTNv2i32, FCVTXNv2f32)>;
def : InstRW<[FalkorWr_3VXVY_5cyc], (instrs FCVTNv8i16, FCVTNv4i32, FCVTXNv4f32)>;
-def : InstRW<[FalkorWr_2VX_2VY_2cyc], (instregex "^(FDIV|FSQRT)(v2f64|v4f32)$")>;
+def : InstRW<[FalkorWr_2VX_2VY_14cyc],(instrs FDIVv2f64)>;
+def : InstRW<[FalkorWr_2VX_2VY_20cyc],(instrs FDIVv4f32)>;
+def : InstRW<[FalkorWr_2VX_2VY_21cyc],(instrs FSQRTv2f64)>;
+def : InstRW<[FalkorWr_2VX_2VY_24cyc],(instrs FSQRTv4f32)>;
def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
(instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>;
@@ -1039,8 +1076,10 @@ def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
(instregex "^F(N)?MULDrr$")>;
-def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^FDIV(S|D)rr$")>;
-def : InstRW<[FalkorWr_1VX_1VY_2cyc], (instregex "^FSQRT(S|D)r$")>;
+def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVSrr)>;
+def : InstRW<[FalkorWr_1VX_1VY_14cyc],(instrs FDIVDrr)>;
+def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTSr)>;
+def : InstRW<[FalkorWr_1VX_1VY_21cyc],(instrs FSQRTDr)>;
def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, ReadDefault, ReadDefault, FalkorReadFMA32],
(instregex "^F(N)?M(ADD|SUB)Srrr$")>;
@@ -1112,7 +1151,7 @@ def : InstRW<[FalkorWr_IMUL64_1X_5cyc, ReadDefault, ReadDefault, FalkorReadIMA64
(instregex "^M(ADD|SUB)Xrrr$")>;
def : InstRW<[FalkorWr_1X_1Z_8cyc], (instregex "^(S|U)DIVWr$")>;
-def : InstRW<[FalkorWr_1X_1Z_16cyc], (instregex "^(S|U)DIVXr$")>;
+def : InstRW<[FalkorWr_1X_1Z_11cyc], (instregex "^(S|U)DIVXr$")>;
def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
(instregex "^(S|U)MULLv.*$")>;
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index d3cab1ad3397..a9a9d5ce8429 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -92,6 +92,10 @@ void AArch64Subtarget::initializeProperties() {
MaxInterleaveFactor = 4;
// FIXME: remove this to enable 64-bit SLP if performance looks good.
MinVectorRegisterBitWidth = 128;
+ CacheLineSize = 128;
+ PrefetchDistance = 820;
+ MinPrefetchStride = 2048;
+ MaxPrefetchIterationsAhead = 8;
break;
case Kryo:
MaxInterleaveFactor = 4;
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h
index d0299149c38c..290a1ca1f24b 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -78,7 +78,7 @@ public:
return 31;
}
- unsigned getRegisterBitWidth(bool Vector) {
+ unsigned getRegisterBitWidth(bool Vector) const {
if (Vector) {
if (ST->hasNEON())
return 128;
diff --git a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index 8084d368c80f..6f3742ed039b 100644
--- a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -9,7 +9,7 @@
//
/// \file
/// This pass marks all internal functions as always_inline and creates
-/// duplicates of all other functions a marks the duplicates as always_inline.
+/// duplicates of all other functions and marks the duplicates as always_inline.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index 3c788fa1dcea..6f002860044c 100644
--- a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -107,7 +107,7 @@ bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
DFS(Start, Checklist);
for (auto &BB : Checklist) {
- BasicBlock::iterator StartIt = (BB == Load->getParent()) ?
+ BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ?
BasicBlock::iterator(Load) : BB->end();
if (MDR->getPointerDependencyFrom(MemoryLocation(Ptr),
true, StartIt, BB, Load).isClobber())
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 251c2f9bb25a..f235313e4853 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -138,7 +138,10 @@ private:
bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
SDValue &ImmOffset, SDValue &VOffset) const;
- bool SelectFlat(SDValue Addr, SDValue &VAddr, SDValue &SLC) const;
+ bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
+ SDValue &Offset, SDValue &SLC) const;
+ bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
+ SDValue &Offset, SDValue &SLC) const;
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
bool &Imm) const;
@@ -1313,14 +1316,37 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
return true;
}
-bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr,
- SDValue &VAddr,
- SDValue &SLC) const {
+bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
+ SDValue &VAddr,
+ SDValue &Offset,
+ SDValue &SLC) const {
+ int64_t OffsetVal = 0;
+
+ if (Subtarget->hasFlatInstOffsets() &&
+ CurDAG->isBaseWithConstantOffset(Addr)) {
+ SDValue N0 = Addr.getOperand(0);
+ SDValue N1 = Addr.getOperand(1);
+ uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getZExtValue();
+ if (isUInt<12>(COffsetVal)) {
+ Addr = N0;
+ OffsetVal = COffsetVal;
+ }
+ }
+
VAddr = Addr;
+ Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
+
return true;
}
+bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
+ SDValue &VAddr,
+ SDValue &Offset,
+ SDValue &SLC) const {
+ return SelectFlatOffset(Addr, VAddr, Offset, SLC);
+}
+
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
SDValue &Offset, bool &Imm) const {
diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index a7eac080f885..e54c887d6090 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -126,8 +126,9 @@ bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD))
.add(I.getOperand(1))
.add(I.getOperand(0))
- .addImm(0)
- .addImm(0);
+ .addImm(0) // offset
+ .addImm(0) // glc
+ .addImm(0); // slc
// Now that we selected an opcode, we need to constrain the register
@@ -392,8 +393,9 @@ bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
.add(I.getOperand(0))
.addReg(PtrReg)
- .addImm(0)
- .addImm(0);
+ .addImm(0) // offset
+ .addImm(0) // glc
+ .addImm(0); // slc
bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
I.eraseFromParent();
diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index b889788c3426..790a69b84397 100644
--- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -34,6 +34,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo() {
const LLT P1 = LLT::pointer(1, 64);
const LLT P2 = LLT::pointer(2, 64);
+ setAction({G_ADD, S32}, Legal);
+
// FIXME: i1 operands to intrinsics should always be legal, but other i1
// values may not be legal. We need to figure out how to distinguish
// between these two scenarios.
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index dee3d2856701..0d6689bd04c4 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -195,7 +195,7 @@ unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) {
return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
}
-unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) {
+unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) const {
return Vector ? 0 : 32;
}
@@ -489,6 +489,19 @@ bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const {
return false;
}
+bool AMDGPUTTIImpl::isAlwaysUniform(const Value *V) const {
+ if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
+ switch (Intrinsic->getIntrinsicID()) {
+ default:
+ return false;
+ case Intrinsic::amdgcn_readfirstlane:
+ case Intrinsic::amdgcn_readlane:
+ return true;
+ }
+ }
+ return false;
+}
+
unsigned AMDGPUTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
if (ST->hasVOP3PInsts()) {
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index e0024e21e82b..a60b1bb1b59c 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -76,7 +76,7 @@ public:
}
unsigned getNumberOfRegisters(bool Vector);
- unsigned getRegisterBitWidth(bool Vector);
+ unsigned getRegisterBitWidth(bool Vector) const;
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
@@ -103,6 +103,7 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
bool isSourceOfDivergence(const Value *V) const;
+ bool isAlwaysUniform(const Value *V) const;
unsigned getFlatAddressSpace() const {
// Don't bother running InferAddressSpaces pass on graphics shaders which
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 16e3b7b4ebee..392e9d89bd9b 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -285,6 +285,9 @@ public:
bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
+
+ bool isOffsetU12() const { return isImmTy(ImmTyOffset) && isUInt<12>(getImm()); }
+ bool isOffsetS13() const { return isImmTy(ImmTyOffset) && isInt<13>(getImm()); }
bool isGDS() const { return isImmTy(ImmTyGDS); }
bool isGLC() const { return isImmTy(ImmTyGLC); }
bool isSLC() const { return isImmTy(ImmTySLC); }
@@ -886,6 +889,10 @@ public:
return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
}
+ bool hasFlatOffsets() const {
+ return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
+ }
+
bool hasSGPR102_SGPR103() const {
return !isVI();
}
@@ -1034,6 +1041,7 @@ public:
AMDGPUOperand::Ptr defaultSMRDOffset8() const;
AMDGPUOperand::Ptr defaultSMRDOffset20() const;
AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
+ AMDGPUOperand::Ptr defaultOffsetU12() const;
OperandMatchResultTy parseOModOperand(OperandVector &Operands);
@@ -1970,6 +1978,15 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
}
}
+ if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
+ // FIXME: Produces error without correct column reported.
+ auto OpNum =
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
+ const auto &Op = Inst.getOperand(OpNum);
+ if (Op.getImm() != 0)
+ return Match_InvalidOperand;
+ }
+
return Match_Success;
}
@@ -3849,6 +3866,10 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
}
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
+}
+
//===----------------------------------------------------------------------===//
// vop3
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/FLATInstructions.td b/lib/Target/AMDGPU/FLATInstructions.td
index 8ba9efd42c70..98eda288bcac 100644
--- a/lib/Target/AMDGPU/FLATInstructions.td
+++ b/lib/Target/AMDGPU/FLATInstructions.td
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
-def FLATAtomic : ComplexPattern<i64, 2, "SelectFlat">;
+def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [], -10>;
+def FLATOffset : ComplexPattern<i64, 3, "SelectFlat", [], [], -10>;
//===----------------------------------------------------------------------===//
// FLAT classes
@@ -55,6 +56,8 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
// copy relevant pseudo op flags
let SubtargetPredicate = ps.SubtargetPredicate;
let AsmMatchConverter = ps.AsmMatchConverter;
+ let TSFlags = ps.TSFlags;
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
// encoding fields
bits<8> vaddr;
@@ -63,10 +66,23 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
bits<1> slc;
bits<1> glc;
+ // Only valid on gfx9
+ bits<1> lds = 0; // XXX - What does this actually do?
+ bits<2> seg; // Segment, 00=flat, 01=scratch, 10=global, 11=reserved
+
+ // Signed offset. Highest bit ignored for flat and treated as 12-bit
+ // unsigned for flat acceses.
+ bits<13> offset;
+ bits<1> nv = 0; // XXX - What does this actually do?
+
// We don't use tfe right now, and it was removed in gfx9.
bits<1> tfe = 0;
- // 15-0 is reserved.
+ // Only valid on GFX9+
+ let Inst{12-0} = offset;
+ let Inst{13} = lds;
+ let Inst{15-14} = 0;
+
let Inst{16} = !if(ps.has_glc, glc, ps.glcValue);
let Inst{17} = slc;
let Inst{24-18} = op;
@@ -74,24 +90,30 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
let Inst{39-32} = vaddr;
let Inst{47-40} = !if(ps.has_data, vdata, ?);
// 54-48 is reserved.
- let Inst{55} = tfe;
+ let Inst{55} = nv; // nv on GFX9+, TFE before.
let Inst{63-56} = !if(ps.has_vdst, vdst, ?);
}
-class FLAT_Load_Pseudo <string opName, RegisterClass regClass> : FLAT_Pseudo<
+class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
+ bit HasSignedOffset = 0> : FLAT_Pseudo<
opName,
(outs regClass:$vdst),
- (ins VReg_64:$vaddr, GLC:$glc, slc:$slc),
- " $vdst, $vaddr$glc$slc"> {
+ !if(HasSignedOffset,
+ (ins VReg_64:$vaddr, offset_s13:$offset, GLC:$glc, slc:$slc),
+ (ins VReg_64:$vaddr, offset_u12:$offset, GLC:$glc, slc:$slc)),
+ " $vdst, $vaddr$offset$glc$slc"> {
let has_data = 0;
let mayLoad = 1;
}
-class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass> : FLAT_Pseudo<
+class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
+ bit HasSignedOffset = 0> : FLAT_Pseudo<
opName,
(outs),
- (ins VReg_64:$vaddr, vdataClass:$vdata, GLC:$glc, slc:$slc),
- " $vaddr, $vdata$glc$slc"> {
+ !if(HasSignedOffset,
+ (ins VReg_64:$vaddr, vdataClass:$vdata, offset_s13:$offset, GLC:$glc, slc:$slc),
+ (ins VReg_64:$vaddr, vdataClass:$vdata, offset_u12:$offset, GLC:$glc, slc:$slc)),
+ " $vaddr, $vdata$offset$glc$slc"> {
let mayLoad = 0;
let mayStore = 1;
let has_vdst = 0;
@@ -103,12 +125,15 @@ multiclass FLAT_Atomic_Pseudo<
ValueType vt,
SDPatternOperator atomic = null_frag,
ValueType data_vt = vt,
- RegisterClass data_rc = vdst_rc> {
+ RegisterClass data_rc = vdst_rc,
+ bit HasSignedOffset = 0> {
def "" : FLAT_Pseudo <opName,
(outs),
- (ins VReg_64:$vaddr, data_rc:$vdata, slc:$slc),
- " $vaddr, $vdata$slc",
+ !if(HasSignedOffset,
+ (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, slc:$slc),
+ (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, slc:$slc)),
+ " $vaddr, $vdata$offset$slc",
[]>,
AtomicNoRet <NAME, 0> {
let mayLoad = 1;
@@ -121,10 +146,12 @@ multiclass FLAT_Atomic_Pseudo<
def _RTN : FLAT_Pseudo <opName,
(outs vdst_rc:$vdst),
- (ins VReg_64:$vaddr, data_rc:$vdata, slc:$slc),
- " $vdst, $vaddr, $vdata glc$slc",
+ !if(HasSignedOffset,
+ (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, slc:$slc),
+ (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, slc:$slc)),
+ " $vdst, $vaddr, $vdata$offset glc$slc",
[(set vt:$vdst,
- (atomic (FLATAtomic i64:$vaddr, i1:$slc), data_vt:$vdata))]>,
+ (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
AtomicNoRet <NAME, 1> {
let mayLoad = 1;
let mayStore = 1;
@@ -312,31 +339,31 @@ def flat_truncstorei16 : flat_st <truncstorei16>;
// Patterns for global loads with no offset.
class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
- (vt (node i64:$addr)),
- (inst $addr, 0, 0)
+ (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
+ (inst $vaddr, $offset, 0, $slc)
>;
class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
- (vt (node i64:$addr)),
- (inst $addr, 1, 0)
+ (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
+ (inst $vaddr, $offset, 1, $slc)
>;
class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
- (node vt:$data, i64:$addr),
- (inst $addr, $data, 0, 0)
+ (node vt:$data, (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc)),
+ (inst $vaddr, $data, $offset, 0, $slc)
>;
class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
// atomic store follows atomic binop convention so the address comes
// first.
- (node i64:$addr, vt:$data),
- (inst $addr, $data, 1, 0)
+ (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
+ (inst $vaddr, $data, $offset, 1, $slc)
>;
class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
ValueType data_vt = vt> : Pat <
- (vt (node i64:$addr, data_vt:$data)),
- (inst $addr, $data, 0)
+ (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
+ (inst $vaddr, $data, $offset, $slc)
>;
let Predicates = [isCIVI] in {
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 599ee942d738..441f1ef4bd04 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -567,9 +567,17 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
}
bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
- // Flat instructions do not have offsets, and only have the register
- // address.
- return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
+ if (!Subtarget->hasFlatInstOffsets()) {
+ // Flat instructions do not have offsets, and only have the register
+ // address.
+ return AM.BaseOffs == 0 && AM.Scale == 0;
+ }
+
+ // GFX9 added a 13-bit signed offset. When using regular flat instructions,
+ // the sign bit is ignored and is treated as a 12-bit unsigned offset.
+
+ // Just r + i
+ return isUInt<12>(AM.BaseOffs) && AM.Scale == 0;
}
bool SITargetLowering::isLegalMUBUFAddressingMode(const AddrMode &AM) const {
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 58c05cf16f15..1097814e99ce 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -468,13 +468,11 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Builder.addReg(RI.getSubReg(SrcReg, SubIdx));
- if (Idx == SubIndices.size() - 1)
- Builder.addReg(SrcReg, getKillRegState(KillSrc) | RegState::Implicit);
-
if (Idx == 0)
Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
- Builder.addReg(SrcReg, RegState::Implicit);
+ bool UseKill = KillSrc && Idx == SubIndices.size() - 1;
+ Builder.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit);
}
}
@@ -2331,11 +2329,12 @@ static bool isSubRegOf(const SIRegisterInfo &TRI,
bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
StringRef &ErrInfo) const {
uint16_t Opcode = MI.getOpcode();
-
if (SIInstrInfo::isGenericOpcode(MI.getOpcode()))
return true;
- const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+
int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
@@ -2565,6 +2564,14 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
+ if (isFLAT(MI) && !MF->getSubtarget<SISubtarget>().hasFlatInstOffsets()) {
+ const MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
+ if (Offset->getImm() != 0) {
+ ErrInfo = "subtarget does not support offsets in flat instructions";
+ return false;
+ }
+ }
+
return true;
}
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index 445bf79a7814..470a47b02443 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -492,11 +492,21 @@ class NamedOperandU8<string Name, AsmOperandClass MatchClass> : Operand<i8> {
let ParserMatchClass = MatchClass;
}
+class NamedOperandU12<string Name, AsmOperandClass MatchClass> : Operand<i16> {
+ let PrintMethod = "print"#Name;
+ let ParserMatchClass = MatchClass;
+}
+
class NamedOperandU16<string Name, AsmOperandClass MatchClass> : Operand<i16> {
let PrintMethod = "print"#Name;
let ParserMatchClass = MatchClass;
}
+class NamedOperandS13<string Name, AsmOperandClass MatchClass> : Operand<i16> {
+ let PrintMethod = "print"#Name;
+ let ParserMatchClass = MatchClass;
+}
+
class NamedOperandU32<string Name, AsmOperandClass MatchClass> : Operand<i32> {
let PrintMethod = "print"#Name;
let ParserMatchClass = MatchClass;
@@ -514,6 +524,8 @@ def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>;
def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>;
def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>;
+def offset_u12 : NamedOperandU12<"Offset", NamedMatchClass<"OffsetU12">>;
+def offset_s13 : NamedOperandS13<"Offset", NamedMatchClass<"OffsetS13">>;
def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>;
def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>;
def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>;
diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
index a7ac9a1dca6e..e498f70b820d 100644
--- a/lib/Target/ARM/ARMCallLowering.cpp
+++ b/lib/Target/ARM/ARMCallLowering.cpp
@@ -35,9 +35,19 @@ ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI)
static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI,
Type *T) {
- if (T->isArrayTy() || T->isStructTy())
+ if (T->isArrayTy())
return true;
+ if (T->isStructTy()) {
+ // For now we only allow homogeneous structs that we can manipulate with
+ // G_MERGE_VALUES and G_UNMERGE_VALUES
+ auto StructT = cast<StructType>(T);
+ for (unsigned i = 1, e = StructT->getNumElements(); i != e; ++i)
+ if (StructT->getElementType(i) != StructT->getElementType(0))
+ return false;
+ return true;
+ }
+
EVT VT = TLI.getValueType(DL, T, true);
if (!VT.isSimple() || VT.isVector() ||
!(VT.isInteger() || VT.isFloatingPoint()))
@@ -220,12 +230,16 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
return false;
SmallVector<ArgInfo, 4> SplitVTs;
+ SmallVector<unsigned, 4> Regs;
ArgInfo RetInfo(VReg, Val->getType());
setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F);
splitToValueTypes(RetInfo, SplitVTs, MF, [&](unsigned Reg, uint64_t Offset) {
- MIRBuilder.buildExtract(Reg, VReg, Offset);
+ Regs.push_back(Reg);
});
+ if (Regs.size() > 1)
+ MIRBuilder.buildUnmerge(Regs, VReg);
+
CCAssignFn *AssignFn =
TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg());
@@ -344,26 +358,6 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
return 1;
}
- /// Merge the values in \p SrcRegs into \p DstReg at offsets \p SrcOffsets.
- /// Note that the source registers are not required to have homogeneous types,
- /// so we use G_INSERT rather than G_MERGE_VALUES.
- // FIXME: Use G_MERGE_VALUES if the types are homogeneous.
- void mergeRegisters(unsigned DstReg, ArrayRef<unsigned> SrcRegs,
- ArrayRef<uint64_t> SrcOffsets) {
- LLT Ty = MRI.getType(DstReg);
-
- unsigned Dst = MRI.createGenericVirtualRegister(Ty);
- MIRBuilder.buildUndef(Dst);
-
- for (unsigned i = 0; i < SrcRegs.size(); ++i) {
- unsigned Tmp = MRI.createGenericVirtualRegister(Ty);
- MIRBuilder.buildInsert(Tmp, Dst, SrcRegs[i], SrcOffsets[i]);
- Dst = Tmp;
- }
-
- MIRBuilder.buildCopy(DstReg, Dst);
- }
-
/// Marking a physical register as used is different between formal
/// parameters, where it's a basic block live-in, and call returns, where it's
/// an implicit-def of the call instruction.
@@ -413,22 +407,19 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 8> ArgInfos;
SmallVector<unsigned, 4> SplitRegs;
- SmallVector<uint64_t, 4> RegOffsets;
unsigned Idx = 0;
for (auto &Arg : F.args()) {
ArgInfo AInfo(VRegs[Idx], Arg.getType());
setArgFlags(AInfo, Idx + AttributeList::FirstArgIndex, DL, F);
SplitRegs.clear();
- RegOffsets.clear();
splitToValueTypes(AInfo, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) {
SplitRegs.push_back(Reg);
- RegOffsets.push_back(Offset);
});
if (!SplitRegs.empty())
- ArgHandler.mergeRegisters(VRegs[Idx], SplitRegs, RegOffsets);
+ MIRBuilder.buildMerge(VRegs[Idx], SplitRegs);
Idx++;
}
@@ -490,9 +481,13 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
if (!Arg.IsFixed)
return false;
+ SmallVector<unsigned, 8> Regs;
splitToValueTypes(Arg, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) {
- MIRBuilder.buildExtract(Reg, Arg.Reg, Offset);
+ Regs.push_back(Reg);
});
+
+ if (Regs.size() > 1)
+ MIRBuilder.buildUnmerge(Regs, Arg.Reg);
}
auto ArgAssignFn = TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
@@ -508,11 +503,9 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
return false;
ArgInfos.clear();
- SmallVector<uint64_t, 8> RegOffsets;
SmallVector<unsigned, 8> SplitRegs;
splitToValueTypes(OrigRet, ArgInfos, MF,
[&](unsigned Reg, uint64_t Offset) {
- RegOffsets.push_back(Offset);
SplitRegs.push_back(Reg);
});
@@ -521,10 +514,10 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler))
return false;
- if (!RegOffsets.empty()) {
+ if (!SplitRegs.empty()) {
// We have split the value and allocated each individual piece, now build
// it up again.
- RetHandler.mergeRegisters(OrigRet.Reg, SplitRegs, RegOffsets);
+ MIRBuilder.buildMerge(OrigRet.Reg, SplitRegs);
}
}
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 817b567db767..5d887c4fcbf2 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -2010,7 +2010,8 @@ def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -2018,7 +2019,8 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
[(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines.
}
@@ -2028,7 +2030,8 @@ def VFNMAH : AHbI<0b11101, 0b01, 1, 0,
IIC_fpFMAC16, "vfnma", ".f16\t$Sd, $Sn, $Sm",
[]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasFullFP16,UseFusedMAC]>;
+ Requires<[HasFullFP16,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
(VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
@@ -2059,14 +2062,16 @@ def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines.
}
@@ -2076,7 +2081,8 @@ def VFNMSH : AHbI<0b11101, 0b01, 0, 0,
IIC_fpFMAC16, "vfnms", ".f16\t$Sd, $Sn, $Sm",
[]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasFullFP16,UseFusedMAC]>;
+ Requires<[HasFullFP16,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
(VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp
index 2d490b7c303e..a706079d9866 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -12,8 +12,10 @@
//===----------------------------------------------------------------------===//
#include "ARMLegalizerInfo.h"
+#include "ARMCallLowering.h"
#include "ARMSubtarget.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
@@ -63,6 +65,16 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({Op, s32}, Libcall);
}
+ // FIXME: Support s8 and s16 as well
+ for (unsigned Op : {G_SREM, G_UREM})
+ if (ST.hasDivideInARMMode())
+ setAction({Op, s32}, Lower);
+ else if (ST.isTargetAEABI() || ST.isTargetGNUAEABI() ||
+ ST.isTargetMuslAEABI())
+ setAction({Op, s32}, Custom);
+ else
+ setAction({Op, s32}, Libcall);
+
for (unsigned Op : {G_SEXT, G_ZEXT}) {
setAction({Op, s32}, Legal);
for (auto Ty : {s1, s8, s16})
@@ -134,5 +146,38 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
}
return true;
}
+ case G_SREM:
+ case G_UREM: {
+ unsigned OriginalResult = MI.getOperand(0).getReg();
+ auto Size = MRI.getType(OriginalResult).getSizeInBits();
+ if (Size != 32)
+ return false;
+
+ auto Libcall =
+ MI.getOpcode() == G_SREM ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32;
+
+ // Our divmod libcalls return a struct containing the quotient and the
+ // remainder. We need to create a virtual register for it.
+ auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+ Type *ArgTy = Type::getInt32Ty(Ctx);
+ StructType *RetTy = StructType::get(Ctx, {ArgTy, ArgTy}, /* Packed */ true);
+ auto RetVal = MRI.createGenericVirtualRegister(
+ getLLTForType(*RetTy, MIRBuilder.getMF().getDataLayout()));
+
+ auto Status = replaceWithLibcall(MI, MIRBuilder, Libcall, {RetVal, RetTy},
+ {{MI.getOperand(1).getReg(), ArgTy},
+ {MI.getOperand(2).getReg(), ArgTy}});
+ if (Status != LegalizerHelper::Legalized)
+ return false;
+
+ // The remainder is the second result of divmod. Split the return value into
+ // a new, unused register for the quotient and the destination of the
+ // original instruction for the remainder.
+ MIRBuilder.buildUnmerge(
+ {MRI.createGenericVirtualRegister(LLT::scalar(32)), OriginalResult},
+ RetVal);
+
+ return LegalizerHelper::Legalized;
+ }
}
}
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
index 7de0543dfa5e..8a1a37863877 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -78,7 +78,7 @@ public:
return 13;
}
- unsigned getRegisterBitWidth(bool Vector) {
+ unsigned getRegisterBitWidth(bool Vector) const {
if (Vector) {
if (ST->hasNEON())
return 128;
diff --git a/lib/Target/BPF/BPFAsmPrinter.cpp b/lib/Target/BPF/BPFAsmPrinter.cpp
index fcd903b7a4a8..9397c78f3dff 100644
--- a/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -35,14 +35,15 @@ using namespace llvm;
namespace {
class BPFAsmPrinter : public AsmPrinter {
public:
- explicit BPFAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
+ explicit BPFAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)) {}
StringRef getPassName() const override { return "BPF Assembly Printer"; }
void EmitInstruction(const MachineInstr *MI) override;
};
-}
+} // namespace
void BPFAsmPrinter::EmitInstruction(const MachineInstr *MI) {
diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp
index 279cdb1a89b4..7d5fb6ca17b9 100644
--- a/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -22,11 +22,14 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+
using namespace llvm;
#define DEBUG_TYPE "bpf-isel"
@@ -42,6 +45,8 @@ public:
return "BPF DAG->DAG Pattern Instruction Selection";
}
+ void PreprocessISelDAG() override;
+
private:
// Include the pieces autogenerated from the target description.
#include "BPFGenDAGISel.inc"
@@ -51,15 +56,31 @@ private:
// Complex Pattern for address selection.
bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset);
bool SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset);
+
+ // Find constants from a constant structure
+ typedef std::vector<unsigned char> val_vec_type;
+ bool fillGenericConstant(const DataLayout &DL, const Constant *CV,
+ val_vec_type &Vals, uint64_t Offset);
+ bool fillConstantDataArray(const DataLayout &DL, const ConstantDataArray *CDA,
+ val_vec_type &Vals, int Offset);
+ bool fillConstantArray(const DataLayout &DL, const ConstantArray *CA,
+ val_vec_type &Vals, int Offset);
+ bool fillConstantStruct(const DataLayout &DL, const ConstantStruct *CS,
+ val_vec_type &Vals, int Offset);
+ bool getConstantFieldValue(const GlobalAddressSDNode *Node, uint64_t Offset,
+ uint64_t Size, unsigned char *ByteSeq);
+
+ // Mapping from ConstantStruct global value to corresponding byte-list values
+ std::map<const void *, val_vec_type> cs_vals_;
};
-}
+} // namespace
// ComplexPattern used on BPF Load/Store instructions
bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
// if Address is FI, get the TargetFrameIndex.
SDLoc DL(Addr);
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
return true;
}
@@ -85,13 +106,14 @@ bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
}
}
- Base = Addr;
+ Base = Addr;
Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
return true;
}
// ComplexPattern used on BPF FI instruction
-bool BPFDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
+bool BPFDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
SDLoc DL(Addr);
if (!CurDAG->isBaseWithConstantOffset(Addr))
@@ -102,8 +124,7 @@ bool BPFDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset)
if (isInt<16>(CN->getSExtValue())) {
// If the first operand is a FI, get the TargetFI Node
- if (FrameIndexSDNode *FIN =
- dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
else
return false;
@@ -129,7 +150,8 @@ void BPFDAGToDAGISel::Select(SDNode *Node) {
// tablegen selection should be handled here.
switch (Opcode) {
- default: break;
+ default:
+ break;
case ISD::SDIV: {
DebugLoc Empty;
const DebugLoc &DL = Node->getDebugLoc();
@@ -181,6 +203,210 @@ void BPFDAGToDAGISel::Select(SDNode *Node) {
SelectCode(Node);
}
+void BPFDAGToDAGISel::PreprocessISelDAG() {
+ // Iterate through all nodes, only interested in loads from ConstantStruct
+ // ConstantArray should have converted by IR->DAG processing
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end();
+ I != E;) {
+ SDNode *Node = &*I++;
+ unsigned Opcode = Node->getOpcode();
+ if (Opcode != ISD::LOAD)
+ continue;
+
+ unsigned char new_val[8]; // hold up the constant values replacing loads.
+ bool to_replace = false;
+ SDLoc DL(Node);
+ const LoadSDNode *LD = cast<LoadSDNode>(Node);
+ uint64_t size = LD->getMemOperand()->getSize();
+ if (!size || size > 8 || (size & (size - 1)))
+ continue;
+
+ SDNode *LDAddrNode = LD->getOperand(1).getNode();
+ // Match LDAddr against either global_addr or (global_addr + offset)
+ unsigned opcode = LDAddrNode->getOpcode();
+ if (opcode == ISD::ADD) {
+ SDValue OP1 = LDAddrNode->getOperand(0);
+ SDValue OP2 = LDAddrNode->getOperand(1);
+
+ // We want to find the pattern global_addr + offset
+ SDNode *OP1N = OP1.getNode();
+ if (OP1N->getOpcode() <= ISD::BUILTIN_OP_END ||
+ OP1N->getNumOperands() == 0)
+ continue;
+
+ DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n');
+
+ const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(OP1N->getOperand(0).getNode());
+ const ConstantSDNode *CDN = dyn_cast<ConstantSDNode>(OP2.getNode());
+ if (GADN && CDN)
+ to_replace =
+ getConstantFieldValue(GADN, CDN->getZExtValue(), size, new_val);
+ } else if (LDAddrNode->getOpcode() > ISD::BUILTIN_OP_END &&
+ LDAddrNode->getNumOperands() > 0) {
+ DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n');
+
+ SDValue OP1 = LDAddrNode->getOperand(0);
+ if (const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(OP1.getNode()))
+ to_replace = getConstantFieldValue(GADN, 0, size, new_val);
+ }
+
+ if (!to_replace)
+ continue;
+
+ // replacing the old with a new value
+ uint64_t val;
+ if (size == 1)
+ val = *(uint8_t *)new_val;
+ else if (size == 2)
+ val = *(uint16_t *)new_val;
+ else if (size == 4)
+ val = *(uint32_t *)new_val;
+ else {
+ val = *(uint64_t *)new_val;
+ }
+
+ DEBUG(dbgs() << "Replacing load of size " << size << " with constant "
+ << val << '\n');
+ SDValue NVal = CurDAG->getConstant(val, DL, MVT::i64);
+
+ // After replacement, the current node is dead, we need to
+ // go backward one step to make iterator still work
+ I--;
+ SDValue From[] = {SDValue(Node, 0), SDValue(Node, 1)};
+ SDValue To[] = {NVal, NVal};
+ CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2);
+ I++;
+ // It is safe to delete node now
+ CurDAG->DeleteNode(Node);
+ }
+}
+
+bool BPFDAGToDAGISel::getConstantFieldValue(const GlobalAddressSDNode *Node,
+ uint64_t Offset, uint64_t Size,
+ unsigned char *ByteSeq) {
+ const GlobalVariable *V = dyn_cast<GlobalVariable>(Node->getGlobal());
+
+ if (!V || !V->hasInitializer())
+ return false;
+
+ const Constant *Init = V->getInitializer();
+ const DataLayout &DL = CurDAG->getDataLayout();
+ val_vec_type TmpVal;
+
+ auto it = cs_vals_.find(static_cast<const void *>(Init));
+ if (it != cs_vals_.end()) {
+ TmpVal = it->second;
+ } else {
+ uint64_t total_size = 0;
+ if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(Init))
+ total_size =
+ DL.getStructLayout(cast<StructType>(CS->getType()))->getSizeInBytes();
+ else if (const ConstantArray *CA = dyn_cast<ConstantArray>(Init))
+ total_size = DL.getTypeAllocSize(CA->getType()->getElementType()) *
+ CA->getNumOperands();
+ else
+ return false;
+
+ val_vec_type Vals(total_size, 0);
+ if (fillGenericConstant(DL, Init, Vals, 0) == false)
+ return false;
+ cs_vals_[static_cast<const void *>(Init)] = Vals;
+ TmpVal = std::move(Vals);
+ }
+
+ // test whether host endianness matches target
+ uint8_t test_buf[2];
+ uint16_t test_val = 0x2345;
+ if (DL.isLittleEndian())
+ support::endian::write16le(test_buf, test_val);
+ else
+ support::endian::write16be(test_buf, test_val);
+
+ bool endian_match = *(uint16_t *)test_buf == test_val;
+ for (uint64_t i = Offset, j = 0; i < Offset + Size; i++, j++)
+ ByteSeq[j] = endian_match ? TmpVal[i] : TmpVal[Offset + Size - 1 - j];
+
+ return true;
+}
+
+bool BPFDAGToDAGISel::fillGenericConstant(const DataLayout &DL,
+ const Constant *CV,
+ val_vec_type &Vals, uint64_t Offset) {
+ uint64_t Size = DL.getTypeAllocSize(CV->getType());
+
+ if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
+ return true; // already done
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ uint64_t val = CI->getZExtValue();
+ DEBUG(dbgs() << "Byte array at offset " << Offset << " with value " << val
+ << '\n');
+
+ if (Size > 8 || (Size & (Size - 1)))
+ return false;
+
+ // Store based on target endian
+ for (uint64_t i = 0; i < Size; ++i) {
+ Vals[Offset + i] = DL.isLittleEndian()
+ ? ((val >> (i * 8)) & 0xFF)
+ : ((val >> ((Size - i - 1) * 8)) & 0xFF);
+ }
+ return true;
+ }
+
+ if (const ConstantDataArray *CDA = dyn_cast<ConstantDataArray>(CV))
+ return fillConstantDataArray(DL, CDA, Vals, Offset);
+
+ if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV))
+ return fillConstantArray(DL, CA, Vals, Offset);
+
+ if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
+ return fillConstantStruct(DL, CVS, Vals, Offset);
+
+ return false;
+}
+
+bool BPFDAGToDAGISel::fillConstantDataArray(const DataLayout &DL,
+ const ConstantDataArray *CDA,
+ val_vec_type &Vals, int Offset) {
+ for (unsigned i = 0, e = CDA->getNumElements(); i != e; ++i) {
+ if (fillGenericConstant(DL, CDA->getElementAsConstant(i), Vals, Offset) ==
+ false)
+ return false;
+ Offset += DL.getTypeAllocSize(CDA->getElementAsConstant(i)->getType());
+ }
+
+ return true;
+}
+
+bool BPFDAGToDAGISel::fillConstantArray(const DataLayout &DL,
+ const ConstantArray *CA,
+ val_vec_type &Vals, int Offset) {
+ for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) {
+ if (fillGenericConstant(DL, CA->getOperand(i), Vals, Offset) == false)
+ return false;
+ Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType());
+ }
+
+ return true;
+}
+
+bool BPFDAGToDAGISel::fillConstantStruct(const DataLayout &DL,
+ const ConstantStruct *CS,
+ val_vec_type &Vals, int Offset) {
+ const StructLayout *Layout = DL.getStructLayout(CS->getType());
+ for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) {
+ const Constant *Field = CS->getOperand(i);
+ uint64_t SizeSoFar = Layout->getElementOffset(i);
+ if (fillGenericConstant(DL, Field, Vals, Offset + SizeSoFar) == false)
+ return false;
+ }
+ return true;
+}
+
FunctionPass *llvm::createBPFISelDag(BPFTargetMachine &TM) {
return new BPFDAGToDAGISel(TM);
}
diff --git a/lib/Target/BPF/BPFInstrInfo.td b/lib/Target/BPF/BPFInstrInfo.td
index c6c0ff587c6b..5ad777268208 100644
--- a/lib/Target/BPF/BPFInstrInfo.td
+++ b/lib/Target/BPF/BPFInstrInfo.td
@@ -51,7 +51,7 @@ def u64imm : Operand<i64> {
let PrintMethod = "printImm64Operand";
}
-def i64immSExt32 : PatLeaf<(imm),
+def i64immSExt32 : PatLeaf<(i64 imm),
[{return isInt<32>(N->getSExtValue()); }]>;
// Addressing modes.
@@ -67,17 +67,17 @@ def MEMri : Operand<i64> {
}
// Conditional code predicates - used for pattern matching for jump instructions
-def BPF_CC_EQ : PatLeaf<(imm),
+def BPF_CC_EQ : PatLeaf<(i64 imm),
[{return (N->getZExtValue() == ISD::SETEQ);}]>;
-def BPF_CC_NE : PatLeaf<(imm),
+def BPF_CC_NE : PatLeaf<(i64 imm),
[{return (N->getZExtValue() == ISD::SETNE);}]>;
-def BPF_CC_GE : PatLeaf<(imm),
+def BPF_CC_GE : PatLeaf<(i64 imm),
[{return (N->getZExtValue() == ISD::SETGE);}]>;
-def BPF_CC_GT : PatLeaf<(imm),
+def BPF_CC_GT : PatLeaf<(i64 imm),
[{return (N->getZExtValue() == ISD::SETGT);}]>;
-def BPF_CC_GTU : PatLeaf<(imm),
+def BPF_CC_GTU : PatLeaf<(i64 imm),
[{return (N->getZExtValue() == ISD::SETUGT);}]>;
-def BPF_CC_GEU : PatLeaf<(imm),
+def BPF_CC_GEU : PatLeaf<(i64 imm),
[{return (N->getZExtValue() == ISD::SETUGE);}]>;
// jump instructions
diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp
index 3c37d9ebb0eb..11ac5454f604 100644
--- a/lib/Target/Hexagon/HexagonGenMux.cpp
+++ b/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -59,9 +59,7 @@ namespace {
public:
static char ID;
- HexagonGenMux() : MachineFunctionPass(ID), HII(nullptr), HRI(nullptr) {
- initializeHexagonGenMuxPass(*PassRegistry::getPassRegistry());
- }
+ HexagonGenMux() : MachineFunctionPass(ID) {}
StringRef getPassName() const override {
return "Hexagon generate mux instructions";
@@ -79,8 +77,8 @@ namespace {
}
private:
- const HexagonInstrInfo *HII;
- const HexagonRegisterInfo *HRI;
+ const HexagonInstrInfo *HII = nullptr;
+ const HexagonRegisterInfo *HRI = nullptr;
struct CondsetInfo {
unsigned PredR = 0;
@@ -134,7 +132,7 @@ namespace {
} // end anonymous namespace
-INITIALIZE_PASS(HexagonGenMux, "hexagon-mux",
+INITIALIZE_PASS(HexagonGenMux, "hexagon-gen-mux",
"Hexagon generate mux instructions", false, false)
void HexagonGenMux::getSubRegs(unsigned Reg, BitVector &SRs) const {
@@ -297,12 +295,15 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
unsigned SR1 = Src1->isReg() ? Src1->getReg() : 0;
unsigned SR2 = Src2->isReg() ? Src2->getReg() : 0;
bool Failure = false, CanUp = true, CanDown = true;
+ bool Used1 = false, Used2 = false;
for (unsigned X = MinX+1; X < MaxX; X++) {
const DefUseInfo &DU = DUM.lookup(X);
if (DU.Defs[PR] || DU.Defs[DR] || DU.Uses[DR]) {
Failure = true;
break;
}
+ Used1 |= DU.Uses[SR1];
+ Used2 |= DU.Uses[SR2];
if (CanDown && DU.Defs[SR1])
CanDown = false;
if (CanUp && DU.Defs[SR2])
@@ -316,6 +317,45 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
// Prefer "down", since this will move the MUX farther away from the
// predicate definition.
MachineBasicBlock::iterator At = CanDown ? Def2 : Def1;
+ if (CanDown) {
+ // If the MUX is placed "down", we need to make sure that there aren't
+ // any kills of the source registers between the two defs.
+ if (Used1 || Used2) {
+ auto ResetKill = [this] (unsigned Reg, MachineInstr &MI) -> bool {
+ if (MachineOperand *Op = MI.findRegisterUseOperand(Reg, true, HRI)) {
+ Op->setIsKill(false);
+ return true;
+ }
+ return false;
+ };
+ bool KilledSR1 = false, KilledSR2 = false;
+ for (MachineInstr &MJ : make_range(std::next(It1), It2)) {
+ if (SR1)
+ KilledSR1 |= ResetKill(SR1, MJ);
+ if (SR2)
+ KilledSR2 |= ResetKill(SR1, MJ);
+ }
+ // If any of the source registers were killed in this range, transfer
+ // the kills to the source operands: they will me "moved" to the
+ // resulting MUX and their parent instructions will be deleted.
+ if (KilledSR1) {
+ assert(Src1->isReg());
+ Src1->setIsKill(true);
+ }
+ if (KilledSR2) {
+ assert(Src2->isReg());
+ Src2->setIsKill(true);
+ }
+ }
+ } else {
+ // If the MUX is placed "up", it shouldn't kill any source registers
+ // that are still used afterwards. We can reset the kill flags directly
+ // on the operands, because the source instructions will be erased.
+ if (Used1 && Src1->isReg())
+ Src1->setIsKill(false);
+ if (Used2 && Src2->isReg())
+ Src2->setIsKill(false);
+ }
ML.push_back(MuxInfo(At, DR, PR, SrcT, SrcF, Def1, Def2));
}
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index e4434136bf86..e5f49ca77a91 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -124,6 +124,7 @@ private:
bool keepsLowBits(const SDValue &Val, unsigned NumBits, SDValue &Src);
bool isOrEquivalentToAdd(const SDNode *N) const;
bool isAlignedMemNode(const MemSDNode *N) const;
+ bool isSmallStackStore(const StoreSDNode *N) const;
bool isPositiveHalfWord(const SDNode *N) const;
// DAG preprocessing functions.
@@ -1462,6 +1463,20 @@ bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const {
return N->getAlignment() >= N->getMemoryVT().getStoreSize();
}
+bool HexagonDAGToDAGISel::isSmallStackStore(const StoreSDNode *N) const {
+ unsigned StackSize = MF->getFrameInfo().estimateStackSize(*MF);
+ switch (N->getMemoryVT().getStoreSize()) {
+ case 1:
+ return StackSize <= 56; // 1*2^6 - 8
+ case 2:
+ return StackSize <= 120; // 2*2^6 - 8
+ case 4:
+ return StackSize <= 248; // 4*2^6 - 8
+ default:
+ return false;
+ }
+}
+
// Return true when the given node fits in a positive half word.
bool HexagonDAGToDAGISel::isPositiveHalfWord(const SDNode *N) const {
if (const ConstantSDNode *CN = dyn_cast<const ConstantSDNode>(N)) {
diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index b748b58bc0ae..f82ad6cb3da6 100644
--- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -1744,7 +1744,8 @@ bool PolynomialMultiplyRecognize::recognize() {
// wide as the target's pmpy instruction.
if (!promoteTypes(LoopB, ExitB))
return false;
- convertShiftsToLeft(LoopB, ExitB, IterCount);
+ if (!convertShiftsToLeft(LoopB, ExitB, IterCount))
+ return false;
cleanupLoopBody(LoopB);
}
diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td
index f269b74fc447..689419638f54 100644
--- a/lib/Target/Hexagon/HexagonPatterns.td
+++ b/lib/Target/Hexagon/HexagonPatterns.td
@@ -401,6 +401,11 @@ def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>;
def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>;
def Sext64: PatLeaf<(i64 Usxtw:$Rs)>;
+def: Pat<(i32 (trunc (sra (mul Sext64:$Rs, Sext64:$Rt), (i32 32)))),
+ (M2_mpy_up (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>;
+def: Pat<(i32 (trunc (srl (mul Sext64:$Rs, Sext64:$Rt), (i32 32)))),
+ (M2_mpy_up (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>;
+
def: Pat<(mul (Aext64 I32:$Rs), (Aext64 I32:$Rt)),
(M2_dpmpyuu_s0 I32:$Rs, I32:$Rt)>;
@@ -1470,16 +1475,22 @@ def i32in8ImmPred: PatLeaf<(i32 imm), [{
return v == (int64_t)(int8_t)v;
}]>;
+class SmallStackStore<PatFrag Store>
+ : PatFrag<(ops node:$Val, node:$Addr), (Store node:$Val, node:$Addr), [{
+ return isSmallStackStore(cast<StoreSDNode>(N));
+}]>;
let AddedComplexity = 40 in {
// Even though the offset is not extendable in the store-immediate, we
// can still generate the fi# in the base address. If the final offset
// is not valid for the instruction, we will replace it with a scratch
// register.
-// def: Storexm_fi_pat <truncstorei8, s32_0ImmPred, ToImmByte, S4_storeirb_io>;
-// def: Storexm_fi_pat <truncstorei16, i16in8ImmPred, ToImmHalf,
-// S4_storeirh_io>;
-// def: Storexm_fi_pat <store, i32in8ImmPred, ToImmWord, S4_storeiri_io>;
+ def: Storexm_fi_pat <SmallStackStore<truncstorei8>, s32_0ImmPred,
+ ToImmByte, S4_storeirb_io>;
+ def: Storexm_fi_pat <SmallStackStore<truncstorei16>, i16in8ImmPred,
+ ToImmHalf, S4_storeirh_io>;
+ def: Storexm_fi_pat <SmallStackStore<store>, i32in8ImmPred,
+ ToImmWord, S4_storeiri_io>;
// defm: Storexm_fi_add_pat <truncstorei8, s32_0ImmPred, u6_0ImmPred, ToImmByte,
// S4_storeirb_io>;
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index c757b6ecdd00..e507a797871f 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -111,6 +111,7 @@ namespace llvm {
extern char &HexagonExpandCondsetsID;
void initializeHexagonExpandCondsetsPass(PassRegistry&);
void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
+ void initializeHexagonGenMuxPass(PassRegistry&);
void initializeHexagonOptAddrModePass(PassRegistry&);
Pass *createHexagonLoopIdiomPass();
@@ -152,8 +153,11 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
extern "C" void LLVMInitializeHexagonTarget() {
// Register the target.
RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget());
- initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
- initializeHexagonOptAddrModePass(*PassRegistry::getPassRegistry());
+
+ PassRegistry &PR = *PassRegistry::getPassRegistry();
+ initializeHexagonLoopIdiomRecognizePass(PR);
+ initializeHexagonGenMuxPass(PR);
+ initializeHexagonOptAddrModePass(PR);
}
HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index f2193013b7aa..68708dc4f50f 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -364,6 +364,18 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::UDIV, MVT::i64, Expand);
setOperationAction(ISD::UREM, MVT::i64, Expand);
+ if (!(Subtarget.hasDSP() && Subtarget.hasMips32r2())) {
+ setOperationAction(ISD::ADDC, MVT::i32, Expand);
+ setOperationAction(ISD::ADDE, MVT::i32, Expand);
+ }
+
+ setOperationAction(ISD::ADDC, MVT::i64, Expand);
+ setOperationAction(ISD::ADDE, MVT::i64, Expand);
+ setOperationAction(ISD::SUBC, MVT::i32, Expand);
+ setOperationAction(ISD::SUBE, MVT::i32, Expand);
+ setOperationAction(ISD::SUBC, MVT::i64, Expand);
+ setOperationAction(ISD::SUBE, MVT::i64, Expand);
+
// Operations not directly supported by Mips.
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
@@ -469,6 +481,7 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::AssertZext);
setTargetDAGCombine(ISD::SHL);
@@ -918,14 +931,130 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
}
}
+static SDValue performMADD_MSUBCombine(SDNode *ROOTNode, SelectionDAG &CurDAG,
+ const MipsSubtarget &Subtarget) {
+ // ROOTNode must have a multiplication as an operand for the match to be
+ // successful.
+ if (ROOTNode->getOperand(0).getOpcode() != ISD::MUL &&
+ ROOTNode->getOperand(1).getOpcode() != ISD::MUL)
+ return SDValue();
+
+ // We don't handle vector types here.
+ if (ROOTNode->getValueType(0).isVector())
+ return SDValue();
+
+ // For MIPS64, madd / msub instructions are inefficent to use with 64 bit
+ // arithmetic. E.g.
+ // (add (mul a b) c) =>
+ // let res = (madd (mthi (drotr c 32))x(mtlo c) a b) in
+ // MIPS64: (or (dsll (mfhi res) 32) (dsrl (dsll (mflo res) 32) 32)
+ // or
+ // MIPS64R2: (dins (mflo res) (mfhi res) 32 32)
+ //
+ // The overhead of setting up the Hi/Lo registers and reassembling the
+ // result makes this a dubious optimzation for MIPS64. The core of the
+ // problem is that Hi/Lo contain the upper and lower 32 bits of the
+ // operand and result.
+ //
+ // It requires a chain of 4 add/mul for MIPS64R2 to get better code
+ // density than doing it naively, 5 for MIPS64. Additionally, using
+ // madd/msub on MIPS64 requires the operands actually be 32 bit sign
+ // extended operands, not true 64 bit values.
+ //
+ // FIXME: For the moment, disable this completely for MIPS64.
+ if (Subtarget.hasMips64())
+ return SDValue();
+
+ SDValue Mult = ROOTNode->getOperand(0).getOpcode() == ISD::MUL
+ ? ROOTNode->getOperand(0)
+ : ROOTNode->getOperand(1);
+
+ SDValue AddOperand = ROOTNode->getOperand(0).getOpcode() == ISD::MUL
+ ? ROOTNode->getOperand(1)
+ : ROOTNode->getOperand(0);
+
+ // Transform this to a MADD only if the user of this node is the add.
+ // If there are other users of the mul, this function returns here.
+ if (!Mult.hasOneUse())
+ return SDValue();
+
+ // maddu and madd are unusual instructions in that on MIPS64 bits 63..31
+ // must be in canonical form, i.e. sign extended. For MIPS32, the operands
+ // of the multiply must have 32 or more sign bits, otherwise we cannot
+ // perform this optimization. We have to check this here as we're performing
+ // this optimization pre-legalization.
+ SDValue MultLHS = Mult->getOperand(0);
+ SDValue MultRHS = Mult->getOperand(1);
+ unsigned LHSSB = CurDAG.ComputeNumSignBits(MultLHS);
+ unsigned RHSSB = CurDAG.ComputeNumSignBits(MultRHS);
+
+ if (LHSSB < 32 || RHSSB < 32)
+ return SDValue();
+
+ APInt HighMask =
+ APInt::getHighBitsSet(Mult->getValueType(0).getScalarSizeInBits(), 32);
+ bool IsUnsigned = CurDAG.MaskedValueIsZero(Mult->getOperand(0), HighMask) &&
+ CurDAG.MaskedValueIsZero(Mult->getOperand(1), HighMask) &&
+ CurDAG.MaskedValueIsZero(AddOperand, HighMask);
+
+ // Initialize accumulator.
+ SDLoc DL(ROOTNode);
+ SDValue TopHalf;
+ SDValue BottomHalf;
+ BottomHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand,
+ CurDAG.getIntPtrConstant(0, DL));
+
+ TopHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand,
+ CurDAG.getIntPtrConstant(1, DL));
+ SDValue ACCIn = CurDAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped,
+ BottomHalf,
+ TopHalf);
+
+ // Create MipsMAdd(u) / MipsMSub(u) node.
+ bool IsAdd = ROOTNode->getOpcode() == ISD::ADD;
+ unsigned Opcode = IsAdd ? (IsUnsigned ? MipsISD::MAddu : MipsISD::MAdd)
+ : (IsUnsigned ? MipsISD::MSubu : MipsISD::MSub);
+ SDValue MAddOps[3] = {
+ CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mult->getOperand(0)),
+ CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mult->getOperand(1)), ACCIn};
+ EVT VTs[2] = {MVT::i32, MVT::i32};
+ SDValue MAdd = CurDAG.getNode(Opcode, DL, VTs, MAddOps);
+
+ SDValue ResLo = CurDAG.getNode(MipsISD::MFLO, DL, MVT::i32, MAdd);
+ SDValue ResHi = CurDAG.getNode(MipsISD::MFHI, DL, MVT::i32, MAdd);
+ SDValue Combined =
+ CurDAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResLo, ResHi);
+ return Combined;
+}
+
+static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget &Subtarget) {
+ // (sub v0 (mul v1, v2)) => (msub v1, v2, v0)
+ if (DCI.isBeforeLegalizeOps()) {
+ if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() &&
+ !Subtarget.inMips16Mode() && N->getValueType(0) == MVT::i64)
+ return performMADD_MSUBCombine(N, DAG, Subtarget);
+
+ return SDValue();
+ }
+
+ return SDValue();
+}
+
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget &Subtarget) {
- // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt))
+ // (add v0 (mul v1, v2)) => (madd v1, v2, v0)
+ if (DCI.isBeforeLegalizeOps()) {
+ if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() &&
+ !Subtarget.inMips16Mode() && N->getValueType(0) == MVT::i64)
+ return performMADD_MSUBCombine(N, DAG, Subtarget);
- if (DCI.isBeforeLegalizeOps())
return SDValue();
+ }
+ // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt))
SDValue Add = N->getOperand(1);
if (Add.getOpcode() != ISD::ADD)
@@ -1053,6 +1182,8 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
return performAssertZextCombine(N, DAG, DCI, Subtarget);
case ISD::SHL:
return performSHLCombine(N, DAG, DCI, Subtarget);
+ case ISD::SUB:
+ return performSUBCombine(N, DAG, DCI, Subtarget);
}
return SDValue();
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index df62c66b75a3..4adf77f8d9a9 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -103,12 +103,9 @@ void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineInstrBuilder MIB = BuildMI(&MBB, DL, MCID);
for (unsigned i = 1; i < Cond.size(); ++i) {
- if (Cond[i].isReg())
- MIB.addReg(Cond[i].getReg());
- else if (Cond[i].isImm())
- MIB.addImm(Cond[i].getImm());
- else
- assert(false && "Cannot copy operand");
+ assert((Cond[i].isImm() || Cond[i].isReg()) &&
+ "Cannot copy operand for conditional branch!");
+ MIB.add(Cond[i]);
}
MIB.addMBB(TBB);
}
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index b95f1158fa56..272595af5f6f 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -274,8 +274,8 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
if (IsPIC) {
MachineBasicBlock *BalTgtMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(FallThroughMBB, BalTgtMBB);
- LongBrMBB->addSuccessor(BalTgtMBB);
- BalTgtMBB->addSuccessor(TgtMBB);
+ LongBrMBB->addSuccessor(BalTgtMBB, BranchProbability::getOne());
+ BalTgtMBB->addSuccessor(&*FallThroughMBB, BranchProbability::getOne());
// We must select between the MIPS32r6/MIPS64r6 BAL (which is a normal
// instruction) and the pre-MIPS32r6/MIPS64r6 definition (which is an
@@ -342,8 +342,8 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
.addReg(Mips::SP).addImm(8);
if (Subtarget.hasMips32r6())
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR))
- .addReg(Mips::ZERO).addReg(Mips::AT);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR), Mips::ZERO)
+ .addReg(Mips::AT);
else
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR)).addReg(Mips::AT);
@@ -415,8 +415,8 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
.addReg(Mips::SP_64).addImm(0);
if (Subtarget.hasMips64r6())
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR64))
- .addReg(Mips::ZERO_64).addReg(Mips::AT_64);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR64), Mips::ZERO_64)
+ .addReg(Mips::AT_64);
else
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR64)).addReg(Mips::AT_64);
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 49ae6dd4cd39..4be26dd25dc0 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -245,46 +245,64 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
}
}
-void MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
- SDValue CmpLHS, const SDLoc &DL,
- SDNode *Node) const {
- unsigned Opc = InFlag.getOpcode(); (void)Opc;
-
- assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
- (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
- "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
-
- unsigned SLTuOp = Mips::SLTu, ADDuOp = Mips::ADDu;
- if (Subtarget->isGP64bit()) {
- SLTuOp = Mips::SLTu64;
- ADDuOp = Mips::DADDu;
- }
-
- SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+void MipsSEDAGToDAGISel::selectAddE(SDNode *Node, const SDLoc &DL) const {
+ SDValue InFlag = Node->getOperand(2);
+ unsigned Opc = InFlag.getOpcode();
SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1);
EVT VT = LHS.getValueType();
- SDNode *Carry = CurDAG->getMachineNode(SLTuOp, DL, VT, Ops);
-
- if (Subtarget->isGP64bit()) {
- // On 64-bit targets, sltu produces an i64 but our backend currently says
- // that SLTu64 produces an i32. We need to fix this in the long run but for
- // now, just make the DAG type-correct by asserting the upper bits are zero.
- Carry = CurDAG->getMachineNode(Mips::SUBREG_TO_REG, DL, VT,
- CurDAG->getTargetConstant(0, DL, VT),
- SDValue(Carry, 0),
- CurDAG->getTargetConstant(Mips::sub_32, DL,
- VT));
+ // In the base case, we can rely on the carry bit from the addsc
+ // instruction.
+ if (Opc == ISD::ADDC) {
+ SDValue Ops[3] = {LHS, RHS, InFlag};
+ CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Ops);
+ return;
}
- // Generate a second addition only if we know that RHS is not a
- // constant-zero node.
- SDNode *AddCarry = Carry;
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
- if (!C || C->getZExtValue())
- AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, SDValue(Carry, 0), RHS);
+ assert(Opc == ISD::ADDE && "ISD::ADDE not in a chain of ADDE nodes!");
+
+ // The more complex case is when there is a chain of ISD::ADDE nodes like:
+ // (adde (adde (adde (addc a b) c) d) e).
+ //
+ // The addwc instruction does not write to the carry bit, instead it writes
+ // to bit 20 of the dsp control register. To match this series of nodes, each
+ // intermediate adde node must be expanded to write the carry bit before the
+ // addition.
+
+ // Start by reading the overflow field for addsc and moving the value to the
+ // carry field. The usage of 1 here with MipsISD::RDDSP / Mips::WRDSP
+ // corresponds to reading/writing the entire control register to/from a GPR.
+
+ SDValue CstOne = CurDAG->getTargetConstant(1, DL, MVT::i32);
+
+ SDValue OuFlag = CurDAG->getTargetConstant(20, DL, MVT::i32);
+
+ SDNode *DSPCtrlField =
+ CurDAG->getMachineNode(Mips::RDDSP, DL, MVT::i32, MVT::Glue, CstOne, InFlag);
+
+ SDNode *Carry = CurDAG->getMachineNode(
+ Mips::EXT, DL, MVT::i32, SDValue(DSPCtrlField, 0), OuFlag, CstOne);
- CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, SDValue(AddCarry, 0));
+ SDValue Ops[4] = {SDValue(DSPCtrlField, 0),
+ CurDAG->getTargetConstant(6, DL, MVT::i32), CstOne,
+ SDValue(Carry, 0)};
+ SDNode *DSPCFWithCarry = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, Ops);
+
+ // My reading of the the MIPS DSP 3.01 specification isn't as clear as I
+ // would like about whether bit 20 always gets overwritten by addwc.
+ // Hence take an extremely conservative view and presume it's sticky. We
+ // therefore need to clear it.
+
+ SDValue Zero = CurDAG->getRegister(Mips::ZERO, MVT::i32);
+
+ SDValue InsOps[4] = {Zero, OuFlag, CstOne, SDValue(DSPCFWithCarry, 0)};
+ SDNode *DSPCtrlFinal = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, InsOps);
+
+ SDNode *WrDSP = CurDAG->getMachineNode(Mips::WRDSP, DL, MVT::Glue,
+ SDValue(DSPCtrlFinal, 0), CstOne);
+
+ SDValue Operands[3] = {LHS, RHS, SDValue(WrDSP, 0)};
+ CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Operands);
}
/// Match frameindex
@@ -765,19 +783,8 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
switch(Opcode) {
default: break;
- case ISD::SUBE: {
- SDValue InFlag = Node->getOperand(2);
- unsigned Opc = Subtarget->isGP64bit() ? Mips::DSUBu : Mips::SUBu;
- selectAddESubE(Opc, InFlag, InFlag.getOperand(0), DL, Node);
- return true;
- }
-
case ISD::ADDE: {
- if (Subtarget->hasDSP()) // Select DSP instructions, ADDSC and ADDWC.
- break;
- SDValue InFlag = Node->getOperand(2);
- unsigned Opc = Subtarget->isGP64bit() ? Mips::DADDu : Mips::ADDu;
- selectAddESubE(Opc, InFlag, InFlag.getValue(0), DL, Node);
+ selectAddE(Node, DL);
return true;
}
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
index f89a350cab04..6f38289c5a45 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -41,8 +41,7 @@ private:
const SDLoc &dl, EVT Ty, bool HasLo,
bool HasHi);
- void selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS,
- const SDLoc &DL, SDNode *Node) const;
+ void selectAddE(SDNode *Node, const SDLoc &DL) const;
bool selectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset) const;
bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset,
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index bf7f079e3105..2382ea271661 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -179,8 +179,6 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
- setTargetDAGCombine(ISD::ADDE);
- setTargetDAGCombine(ISD::SUBE);
setTargetDAGCombine(ISD::MUL);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -421,163 +419,6 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
return MipsTargetLowering::LowerOperation(Op, DAG);
}
-// selectMADD -
-// Transforms a subgraph in CurDAG if the following pattern is found:
-// (addc multLo, Lo0), (adde multHi, Hi0),
-// where,
-// multHi/Lo: product of multiplication
-// Lo0: initial value of Lo register
-// Hi0: initial value of Hi register
-// Return true if pattern matching was successful.
-static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) {
- // ADDENode's second operand must be a flag output of an ADDC node in order
- // for the matching to be successful.
- SDNode *ADDCNode = ADDENode->getOperand(2).getNode();
-
- if (ADDCNode->getOpcode() != ISD::ADDC)
- return false;
-
- SDValue MultHi = ADDENode->getOperand(0);
- SDValue MultLo = ADDCNode->getOperand(0);
- SDNode *MultNode = MultHi.getNode();
- unsigned MultOpc = MultHi.getOpcode();
-
- // MultHi and MultLo must be generated by the same node,
- if (MultLo.getNode() != MultNode)
- return false;
-
- // and it must be a multiplication.
- if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
- return false;
-
- // MultLo amd MultHi must be the first and second output of MultNode
- // respectively.
- if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
- return false;
-
- // Transform this to a MADD only if ADDENode and ADDCNode are the only users
- // of the values of MultNode, in which case MultNode will be removed in later
- // phases.
- // If there exist users other than ADDENode or ADDCNode, this function returns
- // here, which will result in MultNode being mapped to a single MULT
- // instruction node rather than a pair of MULT and MADD instructions being
- // produced.
- if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
- return false;
-
- SDLoc DL(ADDENode);
-
- // Initialize accumulator.
- SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped,
- ADDCNode->getOperand(1),
- ADDENode->getOperand(1));
-
- // create MipsMAdd(u) node
- MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
-
- SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped,
- MultNode->getOperand(0),// Factor 0
- MultNode->getOperand(1),// Factor 1
- ACCIn);
-
- // replace uses of adde and addc here
- if (!SDValue(ADDCNode, 0).use_empty()) {
- SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MAdd);
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut);
- }
- if (!SDValue(ADDENode, 0).use_empty()) {
- SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MAdd);
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut);
- }
-
- return true;
-}
-
-// selectMSUB -
-// Transforms a subgraph in CurDAG if the following pattern is found:
-// (addc Lo0, multLo), (sube Hi0, multHi),
-// where,
-// multHi/Lo: product of multiplication
-// Lo0: initial value of Lo register
-// Hi0: initial value of Hi register
-// Return true if pattern matching was successful.
-static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) {
- // SUBENode's second operand must be a flag output of an SUBC node in order
- // for the matching to be successful.
- SDNode *SUBCNode = SUBENode->getOperand(2).getNode();
-
- if (SUBCNode->getOpcode() != ISD::SUBC)
- return false;
-
- SDValue MultHi = SUBENode->getOperand(1);
- SDValue MultLo = SUBCNode->getOperand(1);
- SDNode *MultNode = MultHi.getNode();
- unsigned MultOpc = MultHi.getOpcode();
-
- // MultHi and MultLo must be generated by the same node,
- if (MultLo.getNode() != MultNode)
- return false;
-
- // and it must be a multiplication.
- if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
- return false;
-
- // MultLo amd MultHi must be the first and second output of MultNode
- // respectively.
- if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
- return false;
-
- // Transform this to a MSUB only if SUBENode and SUBCNode are the only users
- // of the values of MultNode, in which case MultNode will be removed in later
- // phases.
- // If there exist users other than SUBENode or SUBCNode, this function returns
- // here, which will result in MultNode being mapped to a single MULT
- // instruction node rather than a pair of MULT and MSUB instructions being
- // produced.
- if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
- return false;
-
- SDLoc DL(SUBENode);
-
- // Initialize accumulator.
- SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped,
- SUBCNode->getOperand(0),
- SUBENode->getOperand(0));
-
- // create MipsSub(u) node
- MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
-
- SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue,
- MultNode->getOperand(0),// Factor 0
- MultNode->getOperand(1),// Factor 1
- ACCIn);
-
- // replace uses of sube and subc here
- if (!SDValue(SUBCNode, 0).use_empty()) {
- SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MSub);
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut);
- }
- if (!SDValue(SUBENode, 0).use_empty()) {
- SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MSub);
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut);
- }
-
- return true;
-}
-
-static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget &Subtarget) {
- if (DCI.isBeforeLegalize())
- return SDValue();
-
- if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() &&
- N->getValueType(0) == MVT::i32 && selectMADD(N, &DAG))
- return SDValue(N, 0);
-
- return SDValue();
-}
-
// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT
//
// Performs the following transformations:
@@ -820,19 +661,6 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget &Subtarget) {
- if (DCI.isBeforeLegalize())
- return SDValue();
-
- if (Subtarget.hasMips32() && N->getValueType(0) == MVT::i32 &&
- selectMSUB(N, &DAG))
- return SDValue(N, 0);
-
- return SDValue();
-}
-
static SDValue genConstMult(SDValue X, uint64_t C, const SDLoc &DL, EVT VT,
EVT ShiftTy, SelectionDAG &DAG) {
// Clear the upper (64 - VT.sizeInBits) bits.
@@ -1110,16 +938,12 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
SDValue Val;
switch (N->getOpcode()) {
- case ISD::ADDE:
- return performADDECombine(N, DAG, DCI, Subtarget);
case ISD::AND:
Val = performANDCombine(N, DAG, DCI, Subtarget);
break;
case ISD::OR:
Val = performORCombine(N, DAG, DCI, Subtarget);
break;
- case ISD::SUBE:
- return performSUBECombine(N, DAG, DCI, Subtarget);
case ISD::MUL:
return performMULCombine(N, DAG, DCI, this);
case ISD::SHL:
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 625a652a0ca0..ccd47f00c0d3 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -78,7 +78,7 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
// IsNan2008 - IEEE 754-2008 NaN encoding.
bool IsNaN2008bit;
- // IsFP64bit - General-purpose registers are 64 bits wide
+ // IsGP64bit - General-purpose registers are 64 bits wide
bool IsGP64bit;
// IsPTR64bit - Pointers are 64 bit wide
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 28d496ee9ca1..afd2e87078a9 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -2907,19 +2907,6 @@ SDValue PPCDAGToDAGISel::get64BitZExtCompare(SDValue LHS, SDValue RHS,
getI64Imm(58, dl), getI64Imm(63, dl)),
0);
}
- case ISD::SETNE: {
- // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
- // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
- // {addcz.reg, addcz.CA} = (addcarry %a, -1)
- // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
- SDValue Xor = IsRHSZero ? LHS :
- SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
- SDValue AC =
- SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
- Xor, getI32Imm(~0U, dl)), 0);
- return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
- Xor, AC.getValue(1)), 0);
- }
}
}
@@ -2944,19 +2931,6 @@ SDValue PPCDAGToDAGISel::get64BitSExtCompare(SDValue LHS, SDValue RHS,
return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
Addic, Addic.getValue(1)), 0);
}
- case ISD::SETNE: {
- // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
- // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
- // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
- // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
- SDValue Xor = IsRHSZero ? LHS :
- SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
- SDValue SC =
- SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
- Xor, getI32Imm(0, dl)), 0);
- return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
- SC, SC.getValue(1)), 0);
- }
}
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index bda4e5e81734..662550f7a396 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -136,7 +136,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
}
- // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
+ // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
for (MVT VT : MVT::integer_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
@@ -175,7 +175,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
}
- // PowerPC does not support direct load / store of condition registers
+ // PowerPC does not support direct load/store of condition registers.
setOperationAction(ISD::LOAD, MVT::i1, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom);
@@ -204,11 +204,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
- // PowerPC has no SREM/UREM instructions
- setOperationAction(ISD::SREM, MVT::i32, Expand);
- setOperationAction(ISD::UREM, MVT::i32, Expand);
- setOperationAction(ISD::SREM, MVT::i64, Expand);
- setOperationAction(ISD::UREM, MVT::i64, Expand);
+ // PowerPC has no SREM/UREM instructions unless we are on P9
+ // On P9 we may use a hardware instruction to compute the remainder.
+ // The instructions are not legalized directly because in the cases where the
+ // result of both the remainder and the division is required it is more
+ // efficient to compute the remainder from the result of the division rather
+ // than use the remainder instruction.
+ if (Subtarget.isISA3_0()) {
+ setOperationAction(ISD::SREM, MVT::i32, Custom);
+ setOperationAction(ISD::UREM, MVT::i32, Custom);
+ setOperationAction(ISD::SREM, MVT::i64, Custom);
+ setOperationAction(ISD::UREM, MVT::i64, Custom);
+ } else {
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+ }
// Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
@@ -1116,6 +1128,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::VPERM: return "PPCISD::VPERM";
case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
case PPCISD::XXINSERT: return "PPCISD::XXINSERT";
+ case PPCISD::XXREVERSE: return "PPCISD::XXREVERSE";
case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
case PPCISD::VECSHL: return "PPCISD::VECSHL";
case PPCISD::CMPB: return "PPCISD::CMPB";
@@ -1598,22 +1611,34 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
return true;
}
-// Check that the mask is shuffling N byte elements.
-static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width) {
+/// Check that the mask is shuffling N byte elements. Within each N byte
+/// element of the mask, the indices could be either in increasing or
+/// decreasing order as long as they are consecutive.
+/// \param[in] N the shuffle vector SD Node to analyze
+/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
+/// Word/DoubleWord/QuadWord).
+/// \param[in] StepLen the delta indices number among the N byte element, if
+/// the mask is in increasing/decreasing order then it is 1/-1.
+/// \return true iff the mask is shuffling N byte elements.
+static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
+ int StepLen) {
assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.");
+ assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
unsigned NumOfElem = 16 / Width;
unsigned MaskVal[16]; // Width is never greater than 16
for (unsigned i = 0; i < NumOfElem; ++i) {
MaskVal[0] = N->getMaskElt(i * Width);
- if (MaskVal[0] % Width) {
+ if ((StepLen == 1) && (MaskVal[0] % Width)) {
+ return false;
+ } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
return false;
}
for (unsigned int j = 1; j < Width; ++j) {
MaskVal[j] = N->getMaskElt(i * Width + j);
- if (MaskVal[j] != MaskVal[j-1] + 1) {
+ if (MaskVal[j] != MaskVal[j-1] + StepLen) {
return false;
}
}
@@ -1624,7 +1649,7 @@ static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width) {
bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
unsigned &InsertAtByte, bool &Swap, bool IsLE) {
- if (!isNByteElemShuffleMask(N, 4))
+ if (!isNByteElemShuffleMask(N, 4, 1))
return false;
// Now we look at mask elements 0,4,8,12
@@ -1701,7 +1726,7 @@ bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
bool &Swap, bool IsLE) {
assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
// Ensure each byte index of the word is consecutive.
- if (!isNByteElemShuffleMask(N, 4))
+ if (!isNByteElemShuffleMask(N, 4, 1))
return false;
// Now we look at mask elements 0,4,8,12, which are the beginning of words.
@@ -1759,6 +1784,35 @@ bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
}
}
+bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
+ assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
+
+ if (!isNByteElemShuffleMask(N, Width, -1))
+ return false;
+
+ for (int i = 0; i < 16; i += Width)
+ if (N->getMaskElt(i) != i + Width - 1)
+ return false;
+
+ return true;
+}
+
+bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
+ return isXXBRShuffleMaskHelper(N, 2);
+}
+
+bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
+ return isXXBRShuffleMaskHelper(N, 4);
+}
+
+bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
+ return isXXBRShuffleMaskHelper(N, 8);
+}
+
+bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
+ return isXXBRShuffleMaskHelper(N, 16);
+}
+
/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
/// if the inputs to the instruction should be swapped and set \p DM to the
/// value for the immediate.
@@ -1772,7 +1826,7 @@ bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
// Ensure each byte index of the double word is consecutive.
- if (!isNByteElemShuffleMask(N, 8))
+ if (!isNByteElemShuffleMask(N, 8, 1))
return false;
unsigned M0 = N->getMaskElt(0) / 8;
@@ -6819,6 +6873,7 @@ bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
// Given the head of the old chain, ResChain, insert a token factor containing
// it and NewResChain, and make users of ResChain now be users of that token
// factor.
+// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
SDValue NewResChain,
SelectionDAG &DAG) const {
@@ -7846,6 +7901,26 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
}
+ if (Subtarget.hasP9Vector()) {
+ if (PPC::isXXBRHShuffleMask(SVOp)) {
+ SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+ SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
+ } else if (PPC::isXXBRWShuffleMask(SVOp)) {
+ SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
+ SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
+ } else if (PPC::isXXBRDShuffleMask(SVOp)) {
+ SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
+ SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
+ } else if (PPC::isXXBRQShuffleMask(SVOp)) {
+ SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
+ SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
+ }
+ }
+
if (Subtarget.hasVSX()) {
if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
@@ -8393,6 +8468,18 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return SDValue();
}
+SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const {
+ // Check for a DIV with the same operands as this REM.
+ for (auto UI : Op.getOperand(1)->uses()) {
+ if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) ||
+ (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV))
+ if (UI->getOperand(0) == Op.getOperand(0) &&
+ UI->getOperand(1) == Op.getOperand(1))
+ return SDValue();
+ }
+ return Op;
+}
+
SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -8861,6 +8948,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INTRINSIC_VOID:
return LowerINTRINSIC_VOID(Op, DAG);
+ case ISD::SREM:
+ case ISD::UREM:
+ return LowerREM(Op, DAG);
}
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 7982a4a9e9fb..a5108727bb4b 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -86,6 +86,10 @@ namespace llvm {
///
XXINSERT,
+ /// XXREVERSE - The PPC VSX reverse instruction
+ ///
+ XXREVERSE,
+
/// VECSHL - The PPC VSX shift left instruction
///
VECSHL,
@@ -458,6 +462,23 @@ namespace llvm {
/// for a XXSLDWI instruction.
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
bool &Swap, bool IsLE);
+
+ /// isXXBRHShuffleMask - Return true if this is a shuffle mask suitable
+ /// for a XXBRH instruction.
+ bool isXXBRHShuffleMask(ShuffleVectorSDNode *N);
+
+ /// isXXBRWShuffleMask - Return true if this is a shuffle mask suitable
+ /// for a XXBRW instruction.
+ bool isXXBRWShuffleMask(ShuffleVectorSDNode *N);
+
+ /// isXXBRDShuffleMask - Return true if this is a shuffle mask suitable
+ /// for a XXBRD instruction.
+ bool isXXBRDShuffleMask(ShuffleVectorSDNode *N);
+
+ /// isXXBRQShuffleMask - Return true if this is a shuffle mask suitable
+ /// for a XXBRQ instruction.
+ bool isXXBRQShuffleMask(ShuffleVectorSDNode *N);
+
/// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable
/// for a XXPERMDI instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
@@ -918,6 +939,7 @@ namespace llvm {
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 295590b2acf6..70536a6039b8 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -683,6 +683,16 @@ def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"divde $rT, $rA, $rB", IIC_IntDivD,
[(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>,
isPPC64, Requires<[HasExtDiv]>;
+
+let Predicates = [IsISA3_0] in {
+def MODSD : XForm_8<31, 777, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "modsd $rT, $rA, $rB", IIC_IntDivW,
+ [(set i64:$rT, (srem i64:$rA, i64:$rB))]>;
+def MODUD : XForm_8<31, 265, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "modud $rT, $rA, $rB", IIC_IntDivW,
+ [(set i64:$rT, (urem i64:$rA, i64:$rB))]>;
+}
+
let Defs = [CR0] in
def DIVDEo : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"divde. $rT, $rA, $rB", IIC_IntDivD,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index f3c68c443b1b..236e513bec23 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1964,7 +1964,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
}
case PPC::CFENCE8: {
auto Val = MI.getOperand(0).getReg();
- BuildMI(MBB, MI, DL, get(PPC::CMPW), PPC::CR7).addReg(Val).addReg(Val);
+ BuildMI(MBB, MI, DL, get(PPC::CMPD), PPC::CR7).addReg(Val).addReg(Val);
BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
.addImm(PPC::PRED_NE_MINUS)
.addReg(PPC::CR7)
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 8223aa655e38..47d59c25392a 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -53,6 +53,10 @@ def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>,
SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
]>;
+def SDT_PPCVecReverse: SDTypeProfile<1, 1, [ SDTCisVec<0>,
+ SDTCisVec<1>
+]>;
+
def SDT_PPCxxpermdi: SDTypeProfile<1, 3, [ SDTCisVec<0>,
SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
]>;
@@ -174,6 +178,7 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>;
def PPCxxinsert : SDNode<"PPCISD::XXINSERT", SDT_PPCVecInsert, []>;
+def PPCxxreverse : SDNode<"PPCISD::XXREVERSE", SDT_PPCVecReverse, []>;
def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>;
def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>;
@@ -2544,6 +2549,14 @@ let Uses = [RM] in {
"mffs. $rT", IIC_IntMFFS, []>, isDOT;
}
+let Predicates = [IsISA3_0] in {
+def MODSW : XForm_8<31, 779, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "modsw $rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (srem i32:$rA, i32:$rB))]>;
+def MODUW : XForm_8<31, 267, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "moduw $rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (urem i32:$rA, i32:$rB))]>;
+}
let PPC970_Unit = 1, hasSideEffects = 0 in { // FXU Operations.
// XO-Form instructions. Arithmetic instructions that can set overflow bit
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index e214d26c063b..9cfc897cdb3f 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2340,6 +2340,16 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc, []>;
def XXBRQ : XX2_XT6_XO5_XB6<60, 31, 475, "xxbrq", vsrc, []>;
+ // Vector Reverse
+ def : Pat<(v8i16 (PPCxxreverse v8i16 :$A)),
+ (v8i16 (COPY_TO_REGCLASS (XXBRH (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
+ def : Pat<(v4i32 (PPCxxreverse v4i32 :$A)),
+ (v4i32 (XXBRW $A))>;
+ def : Pat<(v2i64 (PPCxxreverse v2i64 :$A)),
+ (v2i64 (XXBRD $A))>;
+ def : Pat<(v1i128 (PPCxxreverse v1i128 :$A)),
+ (v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
+
// Vector Permute
def XXPERM : XX3_XT5_XA5_XB5<60, 26, "xxperm" , vsrc, vsrc, vsrc,
IIC_VecPerm, []>;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index aad913924692..637e52bbdbee 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -273,6 +273,20 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
+bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg,
+ const MachineFunction &MF) const {
+ assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
+ if (TM.isELFv2ABI() && PhysReg == PPC::X2) {
+ // X2 is guaranteed to be preserved within a function if it is reserved.
+ // The reason it's reserved is that it's the TOC pointer (and the function
+ // uses the TOC). In functions where it isn't reserved (i.e. leaf functions
+ // with no TOC access), we can't claim that it is preserved.
+ return (getReservedRegs(MF).test(PPC::X2));
+ } else {
+ return false;
+ }
+}
+
unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
const PPCFrameLowering *TFI = getFrameLowering(MF);
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 4a96327fe552..0bbb71fdf9fb 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -83,6 +83,7 @@ public:
void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
+ bool isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const override;
/// We require the register scavenger.
bool requiresRegisterScavenging(const MachineFunction &MF) const override {
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 5559cdc5fe46..3dbd5f5b9a92 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -230,7 +230,7 @@ unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
return ST->hasVSX() ? 64 : 32;
}
-unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) {
+unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const {
if (Vector) {
if (ST->hasQPX()) return 256;
if (ST->hasAltivec()) return 128;
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 2e0116fee04c..758c335def08 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -63,7 +63,7 @@ public:
bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize);
bool enableInterleavedAccessVectorization();
unsigned getNumberOfRegisters(bool Vector);
- unsigned getRegisterBitWidth(bool Vector);
+ unsigned getRegisterBitWidth(bool Vector) const;
unsigned getCacheLineSize();
unsigned getPrefetchDistance();
unsigned getMaxInterleaveFactor(unsigned VF);
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 6a3dc6799c43..422c16b8eb62 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -302,7 +302,7 @@ unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) {
return 0;
}
-unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) {
+unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) const {
if (!Vector)
return 64;
if (ST->hasVector())
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index ad597f5c65f0..bdba7601eb78 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -53,7 +53,7 @@ public:
/// @{
unsigned getNumberOfRegisters(bool Vector);
- unsigned getRegisterBitWidth(bool Vector);
+ unsigned getRegisterBitWidth(bool Vector) const;
bool prefersVectorizedAddressing() { return false; }
bool supportsEfficientVectorElementLoadStore() { return true; }
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
index ddf964e7dbb7..5ad147e5e596 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -46,9 +46,7 @@ public:
/// .functype
virtual void emitIndirectFunctionType(StringRef name,
SmallVectorImpl<MVT> &Params,
- SmallVectorImpl<MVT> &Results) {
- llvm_unreachable("emitIndirectFunctionType not implemented");
- }
+ SmallVectorImpl<MVT> &Results) = 0;
/// .indidx
virtual void emitIndIdx(const MCExpr *Value) = 0;
/// .import_global
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
index 27c01cb8acf7..19e14f3261aa 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
@@ -16,11 +16,15 @@
#include "MCTargetDesc/WebAssemblyFixupKinds.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "llvm/BinaryFormat/Wasm.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/MC/MCWasmObjectWriter.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
+
using namespace llvm;
namespace {
@@ -29,8 +33,8 @@ public:
explicit WebAssemblyWasmObjectWriter(bool Is64Bit);
private:
- unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
- const MCFixup &Fixup, bool IsPCRel) const override;
+ unsigned getRelocType(const MCValue &Target,
+ const MCFixup &Fixup) const override;
};
} // end anonymous namespace
@@ -39,16 +43,13 @@ WebAssemblyWasmObjectWriter::WebAssemblyWasmObjectWriter(bool Is64Bit)
// Test whether the given expression computes a function address.
static bool IsFunctionExpr(const MCExpr *Expr) {
- if (const MCSymbolRefExpr *SyExp =
- dyn_cast<MCSymbolRefExpr>(Expr))
+ if (auto SyExp = dyn_cast<MCSymbolRefExpr>(Expr))
return cast<MCSymbolWasm>(SyExp->getSymbol()).isFunction();
- if (const MCBinaryExpr *BinOp =
- dyn_cast<MCBinaryExpr>(Expr))
+ if (auto BinOp = dyn_cast<MCBinaryExpr>(Expr))
return IsFunctionExpr(BinOp->getLHS()) != IsFunctionExpr(BinOp->getRHS());
- if (const MCUnaryExpr *UnOp =
- dyn_cast<MCUnaryExpr>(Expr))
+ if (auto UnOp = dyn_cast<MCUnaryExpr>(Expr))
return IsFunctionExpr(UnOp->getSubExpr());
return false;
@@ -59,15 +60,13 @@ static bool IsFunctionType(const MCValue &Target) {
return RefA && RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_TYPEINDEX;
}
-unsigned WebAssemblyWasmObjectWriter::getRelocType(MCContext &Ctx,
- const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const {
+unsigned
+WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
+ const MCFixup &Fixup) const {
// WebAssembly functions are not allocated in the data address space. To
// resolve a pointer to a function, we must use a special relocation type.
bool IsFunction = IsFunctionExpr(Fixup.getValue());
- assert(!IsPCRel);
switch (unsigned(Fixup.getKind())) {
case WebAssembly::fixup_code_sleb128_i32:
if (IsFunction)
diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 4178ec0b28f0..b999091e2d29 100644
--- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -33,6 +33,8 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@@ -218,9 +220,13 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *WebAssemblyAsmPrinter::lowerConstant(const Constant *CV) {
if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
- if (GV->getValueType()->isFunctionTy())
+ if (GV->getValueType()->isFunctionTy()) {
+ MCSymbol* Sym = getSymbol(GV);
+ if (!isa<MCSymbolELF>(Sym))
+ cast<MCSymbolWasm>(Sym)->setIsFunction(true);
return MCSymbolRefExpr::create(
- getSymbol(GV), MCSymbolRefExpr::VK_WebAssembly_FUNCTION, OutContext);
+ Sym, MCSymbolRefExpr::VK_WebAssembly_FUNCTION, OutContext);
+ }
return AsmPrinter::lowerConstant(CV);
}
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index 47aadf99e860..b3ce4bd27460 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -36,7 +36,7 @@ unsigned WebAssemblyTTIImpl::getNumberOfRegisters(bool Vector) {
return Result;
}
-unsigned WebAssemblyTTIImpl::getRegisterBitWidth(bool Vector) {
+unsigned WebAssemblyTTIImpl::getRegisterBitWidth(bool Vector) const {
if (Vector && getST()->hasSIMD128())
return 128;
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index f658609f8930..7b35fc916133 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -55,7 +55,7 @@ public:
/// @{
unsigned getNumberOfRegisters(bool Vector);
- unsigned getRegisterBitWidth(bool Vector);
+ unsigned getRegisterBitWidth(bool Vector) const;
unsigned getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 831e9bdab0e1..172eba0002d4 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1,4 +1,3 @@
-
//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
//
// The LLVM Compiler Infrastructure
@@ -5314,20 +5313,37 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!");
unsigned NumElts = SizeInBits / EltSizeInBits;
- unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
- unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
+ // Bitcast a source array of element bits to the target size.
+ auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef<APInt> SrcEltBits) {
+ unsigned NumSrcElts = UndefSrcElts.getBitWidth();
+ unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth();
+ assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits &&
+ "Constant bit sizes don't match");
- // Extract all the undef/constant element data and pack into single bitsets.
- APInt UndefBits(SizeInBits, 0);
- APInt MaskBits(SizeInBits, 0);
-
- // Split the undef/constant single bitset data into the target elements.
- auto SplitBitData = [&]() {
// Don't split if we don't allow undef bits.
bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs;
- if (UndefBits.getBoolValue() && !AllowUndefs)
+ if (UndefSrcElts.getBoolValue() && !AllowUndefs)
return false;
+ // If we're already the right size, don't bother bitcasting.
+ if (NumSrcElts == NumElts) {
+ UndefElts = UndefSrcElts;
+ EltBits.assign(SrcEltBits.begin(), SrcEltBits.end());
+ return true;
+ }
+
+ // Extract all the undef/constant element data and pack into single bitsets.
+ APInt UndefBits(SizeInBits, 0);
+ APInt MaskBits(SizeInBits, 0);
+
+ for (unsigned i = 0; i != NumSrcElts; ++i) {
+ unsigned BitOffset = i * SrcEltSizeInBits;
+ if (UndefSrcElts[i])
+ UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits);
+ MaskBits.insertBits(SrcEltBits[i], BitOffset);
+ }
+
+ // Split the undef/constant single bitset data into the target elements.
UndefElts = APInt(NumElts, 0);
EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
@@ -5356,20 +5372,19 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
// Collect constant bits and insert into mask/undef bit masks.
auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs,
- unsigned BitOffset) {
+ unsigned UndefBitIndex) {
if (!Cst)
return false;
if (isa<UndefValue>(Cst)) {
- unsigned CstSizeInBits = Cst->getType()->getPrimitiveSizeInBits();
- Undefs.setBits(BitOffset, BitOffset + CstSizeInBits);
+ Undefs.setBit(UndefBitIndex);
return true;
}
if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
- Mask.insertBits(CInt->getValue(), BitOffset);
+ Mask = CInt->getValue();
return true;
}
if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
- Mask.insertBits(CFP->getValueAPF().bitcastToAPInt(), BitOffset);
+ Mask = CFP->getValueAPF().bitcastToAPInt();
return true;
}
return false;
@@ -5377,18 +5392,21 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
// Extract constant bits from build vector.
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
+ unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
+ unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
+
+ APInt UndefSrcElts(NumSrcElts, 0);
+ SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
const SDValue &Src = Op.getOperand(i);
- unsigned BitOffset = i * SrcEltSizeInBits;
if (Src.isUndef()) {
- UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits);
+ UndefSrcElts.setBit(i);
continue;
}
auto *Cst = cast<ConstantSDNode>(Src);
- APInt Bits = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
- MaskBits.insertBits(Bits, BitOffset);
+ SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
}
- return SplitBitData();
+ return CastBitData(UndefSrcElts, SrcEltBits);
}
// Extract constant bits from constant pool vector.
@@ -5397,27 +5415,33 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits()))
return false;
- unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
- for (unsigned i = 0, e = CstTy->getVectorNumElements(); i != e; ++i)
- if (!CollectConstantBits(Cst->getAggregateElement(i), MaskBits, UndefBits,
- i * CstEltSizeInBits))
+ unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits();
+ unsigned NumSrcElts = CstTy->getVectorNumElements();
+
+ APInt UndefSrcElts(NumSrcElts, 0);
+ SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
+ for (unsigned i = 0; i != NumSrcElts; ++i)
+ if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i],
+ UndefSrcElts, i))
return false;
- return SplitBitData();
+ return CastBitData(UndefSrcElts, SrcEltBits);
}
// Extract constant bits from a broadcasted constant pool scalar.
if (Op.getOpcode() == X86ISD::VBROADCAST &&
- EltSizeInBits <= SrcEltSizeInBits) {
+ EltSizeInBits <= VT.getScalarSizeInBits()) {
if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) {
- APInt Bits(SizeInBits, 0);
- APInt Undefs(SizeInBits, 0);
- if (CollectConstantBits(Broadcast, Bits, Undefs, 0)) {
- for (unsigned i = 0; i != NumSrcElts; ++i) {
- MaskBits |= Bits.shl(i * SrcEltSizeInBits);
- UndefBits |= Undefs.shl(i * SrcEltSizeInBits);
- }
- return SplitBitData();
+ unsigned SrcEltSizeInBits = Broadcast->getType()->getScalarSizeInBits();
+ unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
+
+ APInt UndefSrcElts(NumSrcElts, 0);
+ SmallVector<APInt, 64> SrcEltBits(1, APInt(SrcEltSizeInBits, 0));
+ if (CollectConstantBits(Broadcast, SrcEltBits[0], UndefSrcElts, 0)) {
+ if (UndefSrcElts[0])
+ UndefSrcElts.setBits(0, NumSrcElts);
+ SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]);
+ return CastBitData(UndefSrcElts, SrcEltBits);
}
}
}
@@ -5426,10 +5450,15 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) {
+ unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
+ unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
+
+ APInt UndefSrcElts(NumSrcElts, 0);
+ SmallVector<APInt, 64> SrcEltBits;
auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
- MaskBits = CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
- MaskBits = MaskBits.zext(SizeInBits);
- return SplitBitData();
+ SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits));
+ SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0));
+ return CastBitData(UndefSrcElts, SrcEltBits);
}
return false;
@@ -6491,16 +6520,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
SDValue NewLd =
DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
LDBase->getPointerInfo(), LDBase->getAlignment(), MMOFlags);
-
- if (LDBase->hasAnyUseOfValue(1)) {
- SDValue NewChain =
- DAG.getNode(ISD::TokenFactor, DL, MVT::Other, SDValue(LDBase, 1),
- SDValue(NewLd.getNode(), 1));
- DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
- DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
- SDValue(NewLd.getNode(), 1));
- }
-
+ DAG.makeEquivalentMemoryOrdering(LDBase, NewLd);
return NewLd;
};
@@ -6565,19 +6585,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
LDBase->getAlignment(),
false/*isVolatile*/, true/*ReadMem*/,
false/*WriteMem*/);
-
- // Make sure the newly-created LOAD is in the same position as LDBase in
- // terms of dependency. We create a TokenFactor for LDBase and ResNode,
- // and update uses of LDBase's output chain to use the TokenFactor.
- if (LDBase->hasAnyUseOfValue(1)) {
- SDValue NewChain =
- DAG.getNode(ISD::TokenFactor, DL, MVT::Other, SDValue(LDBase, 1),
- SDValue(ResNode.getNode(), 1));
- DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
- DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
- SDValue(ResNode.getNode(), 1));
- }
-
+ DAG.makeEquivalentMemoryOrdering(LDBase, ResNode);
return DAG.getBitcast(VT, ResNode);
}
}
@@ -9930,17 +9938,7 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
DAG.getMachineFunction().getMachineMemOperand(
Ld->getMemOperand(), Offset, SVT.getStoreSize()));
-
- // Make sure the newly-created LOAD is in the same position as Ld in
- // terms of dependency. We create a TokenFactor for Ld and V,
- // and update uses of Ld's output chain to use the TokenFactor.
- if (Ld->hasAnyUseOfValue(1)) {
- SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
- SDValue(Ld, 1), SDValue(V.getNode(), 1));
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain);
- DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(Ld, 1),
- SDValue(V.getNode(), 1));
- }
+ DAG.makeEquivalentMemoryOrdering(Ld, V);
} else if (!BroadcastFromReg) {
// We can't broadcast from a vector register.
return SDValue();
@@ -10891,9 +10889,10 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
"We need to be changing the number of flipped inputs!");
int PSHUFHalfMask[] = {0, 1, 2, 3};
std::swap(PSHUFHalfMask[FixFreeIdx % 4], PSHUFHalfMask[FixIdx % 4]);
- V = DAG.getNode(FixIdx < 4 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW, DL,
- MVT::v8i16, V,
- getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG));
+ V = DAG.getNode(
+ FixIdx < 4 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW, DL,
+ MVT::getVectorVT(MVT::i16, V.getValueSizeInBits() / 16), V,
+ getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG));
for (int &M : Mask)
if (M >= 0 && M == FixIdx)
@@ -12007,18 +12006,22 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
// subvector.
bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1});
if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) {
- // With AVX2 we should use VPERMQ/VPERMPD to allow memory folding.
+ // With AVX2, use VPERMQ/VPERMPD to allow memory folding.
if (Subtarget.hasAVX2() && V2.isUndef())
return SDValue();
- MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(),
- VT.getVectorNumElements() / 2);
- SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
- DAG.getIntPtrConstant(0, DL));
- SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
- OnlyUsesV1 ? V1 : V2,
- DAG.getIntPtrConstant(0, DL));
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
+ // With AVX1, use vperm2f128 (below) to allow load folding. Otherwise,
+ // this will likely become vinsertf128 which can't fold a 256-bit memop.
+ if (!isa<LoadSDNode>(peekThroughBitcasts(V1))) {
+ MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(),
+ VT.getVectorNumElements() / 2);
+ SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
+ DAG.getIntPtrConstant(0, DL));
+ SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
+ OnlyUsesV1 ? V1 : V2,
+ DAG.getIntPtrConstant(0, DL));
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
+ }
}
}
@@ -19117,7 +19120,7 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
SDValue IMask = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i1, Mask);
if (Op.getOpcode() == X86ISD::FSETCCM ||
- Op.getOpcode() == X86ISD::FSETCCM_RND)
+ Op.getOpcode() == X86ISD::FSETCCM_RND)
return DAG.getNode(ISD::AND, dl, VT, Op, IMask);
if (Op.getOpcode() == X86ISD::VFPCLASSS)
return DAG.getNode(ISD::OR, dl, VT, Op, IMask);
@@ -27968,28 +27971,45 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
OpMask.size() % RootMask.size() == 0) ||
OpMask.size() == RootMask.size()) &&
"The smaller number of elements must divide the larger.");
- int MaskWidth = std::max<int>(OpMask.size(), RootMask.size());
- int RootRatio = std::max<int>(1, OpMask.size() / RootMask.size());
- int OpRatio = std::max<int>(1, RootMask.size() / OpMask.size());
- assert(((RootRatio == 1 && OpRatio == 1) ||
- (RootRatio == 1) != (OpRatio == 1)) &&
+
+ // This function can be performance-critical, so we rely on the power-of-2
+ // knowledge that we have about the mask sizes to replace div/rem ops with
+ // bit-masks and shifts.
+ assert(isPowerOf2_32(RootMask.size()) && "Non-power-of-2 shuffle mask sizes");
+ assert(isPowerOf2_32(OpMask.size()) && "Non-power-of-2 shuffle mask sizes");
+ unsigned RootMaskSizeLog2 = countTrailingZeros(RootMask.size());
+ unsigned OpMaskSizeLog2 = countTrailingZeros(OpMask.size());
+
+ unsigned MaskWidth = std::max<unsigned>(OpMask.size(), RootMask.size());
+ unsigned RootRatio = std::max<unsigned>(1, OpMask.size() >> RootMaskSizeLog2);
+ unsigned OpRatio = std::max<unsigned>(1, RootMask.size() >> OpMaskSizeLog2);
+ assert((RootRatio == 1 || OpRatio == 1) &&
"Must not have a ratio for both incoming and op masks!");
- SmallVector<int, 64> Mask((unsigned)MaskWidth, SM_SentinelUndef);
+ assert(isPowerOf2_32(MaskWidth) && "Non-power-of-2 shuffle mask sizes");
+ assert(isPowerOf2_32(RootRatio) && "Non-power-of-2 shuffle mask sizes");
+ assert(isPowerOf2_32(OpRatio) && "Non-power-of-2 shuffle mask sizes");
+ unsigned RootRatioLog2 = countTrailingZeros(RootRatio);
+ unsigned OpRatioLog2 = countTrailingZeros(OpRatio);
+
+ SmallVector<int, 64> Mask(MaskWidth, SM_SentinelUndef);
// Merge this shuffle operation's mask into our accumulated mask. Note that
// this shuffle's mask will be the first applied to the input, followed by the
// root mask to get us all the way to the root value arrangement. The reason
// for this order is that we are recursing up the operation chain.
- for (int i = 0; i < MaskWidth; ++i) {
- int RootIdx = i / RootRatio;
+ for (unsigned i = 0; i < MaskWidth; ++i) {
+ unsigned RootIdx = i >> RootRatioLog2;
if (RootMask[RootIdx] < 0) {
// This is a zero or undef lane, we're done.
Mask[i] = RootMask[RootIdx];
continue;
}
- int RootMaskedIdx = RootMask[RootIdx] * RootRatio + i % RootRatio;
+ unsigned RootMaskedIdx =
+ RootRatio == 1
+ ? RootMask[RootIdx]
+ : (RootMask[RootIdx] << RootRatioLog2) + (i & (RootRatio - 1));
// Just insert the scaled root mask value if it references an input other
// than the SrcOp we're currently inserting.
@@ -27999,9 +28019,8 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
continue;
}
- RootMaskedIdx %= MaskWidth;
-
- int OpIdx = RootMaskedIdx / OpRatio;
+ RootMaskedIdx = RootMaskedIdx & (MaskWidth - 1);
+ unsigned OpIdx = RootMaskedIdx >> OpRatioLog2;
if (OpMask[OpIdx] < 0) {
// The incoming lanes are zero or undef, it doesn't matter which ones we
// are using.
@@ -28010,9 +28029,12 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
}
// Ok, we have non-zero lanes, map them through to one of the Op's inputs.
- int OpMaskedIdx = OpMask[OpIdx] * OpRatio + RootMaskedIdx % OpRatio;
- OpMaskedIdx %= MaskWidth;
+ unsigned OpMaskedIdx =
+ OpRatio == 1
+ ? OpMask[OpIdx]
+ : (OpMask[OpIdx] << OpRatioLog2) + (RootMaskedIdx & (OpRatio - 1));
+ OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1);
if (OpMask[OpIdx] < (int)OpMask.size()) {
assert(0 <= InputIdx0 && "Unknown target shuffle input");
OpMaskedIdx += InputIdx0 * MaskWidth;
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index d8702693884d..2620679df251 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -1631,6 +1631,7 @@ multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2)))))],
IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ let isCommutable = IsCommutable in
def rrk : AVX512BI<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
@@ -1764,6 +1765,7 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
(_.VT (bitconvert (_.LdFrag addr:$src2))),
imm:$cc))],
IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ let isCommutable = 1 in
def rrik : AVX512AIi8<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
AVX512ICC:$cc),
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 5224a16613cb..c28b35b22977 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -737,19 +737,15 @@ def alignedloadv8f64 : PatFrag<(ops node:$ptr),
def alignedloadv8i64 : PatFrag<(ops node:$ptr),
(v8i64 (alignedload512 node:$ptr))>;
-// Like 'load', but uses special alignment checks suitable for use in
+// Like 'vec128load', but uses special alignment checks suitable for use in
// memory operands in most SSE instructions, which are required to
// be naturally aligned on some targets but not on others. If the subtarget
// allows unaligned accesses, match any load, though this may require
// setting a feature bit in the processor (on startup, for example).
// Opteron 10h and later implement such a feature.
-// Avoid non-temporal aligned loads on supported targets.
-def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return (Subtarget->hasSSEUnalignedMem() ||
- cast<LoadSDNode>(N)->getAlignment() >= 16) &&
- (!Subtarget->hasSSE41() ||
- !(cast<LoadSDNode>(N)->getAlignment() >= 16 &&
- cast<LoadSDNode>(N)->isNonTemporal()));
+def memop : PatFrag<(ops node:$ptr), (vec128load node:$ptr), [{
+ return Subtarget->hasSSEUnalignedMem() ||
+ cast<LoadSDNode>(N)->getAlignment() >= 16;
}]>;
// 128-bit memop pattern fragments
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index ff5d90c4e78b..f3094b781c49 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -898,10 +898,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPABSDZrr, X86::VPABSDZrm, 0 },
{ X86::VPABSQZrr, X86::VPABSQZrm, 0 },
{ X86::VPABSWZrr, X86::VPABSWZrm, 0 },
+ { X86::VPCONFLICTDZrr, X86::VPCONFLICTDZrm, 0 },
+ { X86::VPCONFLICTQZrr, X86::VPCONFLICTQZrm, 0 },
{ X86::VPERMILPDZri, X86::VPERMILPDZmi, 0 },
{ X86::VPERMILPSZri, X86::VPERMILPSZmi, 0 },
{ X86::VPERMPDZri, X86::VPERMPDZmi, 0 },
{ X86::VPERMQZri, X86::VPERMQZmi, 0 },
+ { X86::VPLZCNTDZrr, X86::VPLZCNTDZrm, 0 },
+ { X86::VPLZCNTQZrr, X86::VPLZCNTQZrm, 0 },
{ X86::VPMOVSXBDZrr, X86::VPMOVSXBDZrm, 0 },
{ X86::VPMOVSXBQZrr, X86::VPMOVSXBQZrm, TB_NO_REVERSE },
{ X86::VPMOVSXBWZrr, X86::VPMOVSXBWZrm, 0 },
@@ -948,10 +952,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPABSDZ256rr, X86::VPABSDZ256rm, 0 },
{ X86::VPABSQZ256rr, X86::VPABSQZ256rm, 0 },
{ X86::VPABSWZ256rr, X86::VPABSWZ256rm, 0 },
+ { X86::VPCONFLICTDZ256rr, X86::VPCONFLICTDZ256rm, 0 },
+ { X86::VPCONFLICTQZ256rr, X86::VPCONFLICTQZ256rm, 0 },
{ X86::VPERMILPDZ256ri, X86::VPERMILPDZ256mi, 0 },
{ X86::VPERMILPSZ256ri, X86::VPERMILPSZ256mi, 0 },
{ X86::VPERMPDZ256ri, X86::VPERMPDZ256mi, 0 },
{ X86::VPERMQZ256ri, X86::VPERMQZ256mi, 0 },
+ { X86::VPLZCNTDZ256rr, X86::VPLZCNTDZ256rm, 0 },
+ { X86::VPLZCNTQZ256rr, X86::VPLZCNTQZ256rm, 0 },
{ X86::VPMOVSXBDZ256rr, X86::VPMOVSXBDZ256rm, TB_NO_REVERSE },
{ X86::VPMOVSXBQZ256rr, X86::VPMOVSXBQZ256rm, TB_NO_REVERSE },
{ X86::VPMOVSXBWZ256rr, X86::VPMOVSXBWZ256rm, 0 },
@@ -995,8 +1003,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPABSDZ128rr, X86::VPABSDZ128rm, 0 },
{ X86::VPABSQZ128rr, X86::VPABSQZ128rm, 0 },
{ X86::VPABSWZ128rr, X86::VPABSWZ128rm, 0 },
+ { X86::VPCONFLICTDZ128rr, X86::VPCONFLICTDZ128rm, 0 },
+ { X86::VPCONFLICTQZ128rr, X86::VPCONFLICTQZ128rm, 0 },
{ X86::VPERMILPDZ128ri, X86::VPERMILPDZ128mi, 0 },
{ X86::VPERMILPSZ128ri, X86::VPERMILPSZ128mi, 0 },
+ { X86::VPLZCNTDZ128rr, X86::VPLZCNTDZ128rm, 0 },
+ { X86::VPLZCNTQZ128rr, X86::VPLZCNTQZ128rm, 0 },
{ X86::VPMOVSXBDZ128rr, X86::VPMOVSXBDZ128rm, TB_NO_REVERSE },
{ X86::VPMOVSXBQZ128rr, X86::VPMOVSXBQZ128rm, TB_NO_REVERSE },
{ X86::VPMOVSXBWZ128rr, X86::VPMOVSXBWZ128rm, TB_NO_REVERSE },
@@ -2312,10 +2324,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPABSDZrrkz, X86::VPABSDZrmkz, 0 },
{ X86::VPABSQZrrkz, X86::VPABSQZrmkz, 0 },
{ X86::VPABSWZrrkz, X86::VPABSWZrmkz, 0 },
+ { X86::VPCONFLICTDZrrkz, X86::VPCONFLICTDZrmkz, 0 },
+ { X86::VPCONFLICTQZrrkz, X86::VPCONFLICTQZrmkz, 0 },
{ X86::VPERMILPDZrikz, X86::VPERMILPDZmikz, 0 },
{ X86::VPERMILPSZrikz, X86::VPERMILPSZmikz, 0 },
{ X86::VPERMPDZrikz, X86::VPERMPDZmikz, 0 },
{ X86::VPERMQZrikz, X86::VPERMQZmikz, 0 },
+ { X86::VPLZCNTDZrrkz, X86::VPLZCNTDZrmkz, 0 },
+ { X86::VPLZCNTQZrrkz, X86::VPLZCNTQZrmkz, 0 },
{ X86::VPMOVSXBDZrrkz, X86::VPMOVSXBDZrmkz, 0 },
{ X86::VPMOVSXBQZrrkz, X86::VPMOVSXBQZrmkz, TB_NO_REVERSE },
{ X86::VPMOVSXBWZrrkz, X86::VPMOVSXBWZrmkz, 0 },
@@ -2350,10 +2366,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPABSDZ256rrkz, X86::VPABSDZ256rmkz, 0 },
{ X86::VPABSQZ256rrkz, X86::VPABSQZ256rmkz, 0 },
{ X86::VPABSWZ256rrkz, X86::VPABSWZ256rmkz, 0 },
+ { X86::VPCONFLICTDZ256rrkz, X86::VPCONFLICTDZ256rmkz, 0 },
+ { X86::VPCONFLICTQZ256rrkz, X86::VPCONFLICTQZ256rmkz, 0 },
{ X86::VPERMILPDZ256rikz, X86::VPERMILPDZ256mikz, 0 },
{ X86::VPERMILPSZ256rikz, X86::VPERMILPSZ256mikz, 0 },
{ X86::VPERMPDZ256rikz, X86::VPERMPDZ256mikz, 0 },
{ X86::VPERMQZ256rikz, X86::VPERMQZ256mikz, 0 },
+ { X86::VPLZCNTDZ256rrkz, X86::VPLZCNTDZ256rmkz, 0 },
+ { X86::VPLZCNTQZ256rrkz, X86::VPLZCNTQZ256rmkz, 0 },
{ X86::VPMOVSXBDZ256rrkz, X86::VPMOVSXBDZ256rmkz, TB_NO_REVERSE },
{ X86::VPMOVSXBQZ256rrkz, X86::VPMOVSXBQZ256rmkz, TB_NO_REVERSE },
{ X86::VPMOVSXBWZ256rrkz, X86::VPMOVSXBWZ256rmkz, 0 },
@@ -2385,8 +2405,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPABSDZ128rrkz, X86::VPABSDZ128rmkz, 0 },
{ X86::VPABSQZ128rrkz, X86::VPABSQZ128rmkz, 0 },
{ X86::VPABSWZ128rrkz, X86::VPABSWZ128rmkz, 0 },
+ { X86::VPCONFLICTDZ128rrkz, X86::VPCONFLICTDZ128rmkz, 0 },
+ { X86::VPCONFLICTQZ128rrkz, X86::VPCONFLICTQZ128rmkz, 0 },
{ X86::VPERMILPDZ128rikz, X86::VPERMILPDZ128mikz, 0 },
{ X86::VPERMILPSZ128rikz, X86::VPERMILPSZ128mikz, 0 },
+ { X86::VPLZCNTDZ128rrkz, X86::VPLZCNTDZ128rmkz, 0 },
+ { X86::VPLZCNTQZ128rrkz, X86::VPLZCNTQZ128rmkz, 0 },
{ X86::VPMOVSXBDZ128rrkz, X86::VPMOVSXBDZ128rmkz, TB_NO_REVERSE },
{ X86::VPMOVSXBQZ128rrkz, X86::VPMOVSXBQZ128rmkz, TB_NO_REVERSE },
{ X86::VPMOVSXBWZ128rrkz, X86::VPMOVSXBWZ128rmkz, TB_NO_REVERSE },
@@ -2935,10 +2959,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPABSDZrrk, X86::VPABSDZrmk, 0 },
{ X86::VPABSQZrrk, X86::VPABSQZrmk, 0 },
{ X86::VPABSWZrrk, X86::VPABSWZrmk, 0 },
+ { X86::VPCONFLICTDZrrk, X86::VPCONFLICTDZrmk, 0 },
+ { X86::VPCONFLICTQZrrk, X86::VPCONFLICTQZrmk, 0 },
{ X86::VPERMILPDZrik, X86::VPERMILPDZmik, 0 },
{ X86::VPERMILPSZrik, X86::VPERMILPSZmik, 0 },
{ X86::VPERMPDZrik, X86::VPERMPDZmik, 0 },
{ X86::VPERMQZrik, X86::VPERMQZmik, 0 },
+ { X86::VPLZCNTDZrrk, X86::VPLZCNTDZrmk, 0 },
+ { X86::VPLZCNTQZrrk, X86::VPLZCNTQZrmk, 0 },
{ X86::VPMOVSXBDZrrk, X86::VPMOVSXBDZrmk, 0 },
{ X86::VPMOVSXBQZrrk, X86::VPMOVSXBQZrmk, TB_NO_REVERSE },
{ X86::VPMOVSXBWZrrk, X86::VPMOVSXBWZrmk, 0 },
@@ -2973,10 +3001,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPABSDZ256rrk, X86::VPABSDZ256rmk, 0 },
{ X86::VPABSQZ256rrk, X86::VPABSQZ256rmk, 0 },
{ X86::VPABSWZ256rrk, X86::VPABSWZ256rmk, 0 },
+ { X86::VPCONFLICTDZ256rrk, X86::VPCONFLICTDZ256rmk, 0 },
+ { X86::VPCONFLICTQZ256rrk, X86::VPCONFLICTQZ256rmk, 0 },
{ X86::VPERMILPDZ256rik, X86::VPERMILPDZ256mik, 0 },
{ X86::VPERMILPSZ256rik, X86::VPERMILPSZ256mik, 0 },
{ X86::VPERMPDZ256rik, X86::VPERMPDZ256mik, 0 },
{ X86::VPERMQZ256rik, X86::VPERMQZ256mik, 0 },
+ { X86::VPLZCNTDZ256rrk, X86::VPLZCNTDZ256rmk, 0 },
+ { X86::VPLZCNTQZ256rrk, X86::VPLZCNTQZ256rmk, 0 },
{ X86::VPMOVSXBDZ256rrk, X86::VPMOVSXBDZ256rmk, TB_NO_REVERSE },
{ X86::VPMOVSXBQZ256rrk, X86::VPMOVSXBQZ256rmk, TB_NO_REVERSE },
{ X86::VPMOVSXBWZ256rrk, X86::VPMOVSXBWZ256rmk, 0 },
@@ -3008,8 +3040,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPABSDZ128rrk, X86::VPABSDZ128rmk, 0 },
{ X86::VPABSQZ128rrk, X86::VPABSQZ128rmk, 0 },
{ X86::VPABSWZ128rrk, X86::VPABSWZ128rmk, 0 },
+ { X86::VPCONFLICTDZ128rrk, X86::VPCONFLICTDZ128rmk, 0 },
+ { X86::VPCONFLICTQZ128rrk, X86::VPCONFLICTQZ128rmk, 0 },
{ X86::VPERMILPDZ128rik, X86::VPERMILPDZ128mik, 0 },
{ X86::VPERMILPSZ128rik, X86::VPERMILPSZ128mik, 0 },
+ { X86::VPLZCNTDZ128rrk, X86::VPLZCNTDZ128rmk, 0 },
+ { X86::VPLZCNTQZ128rrk, X86::VPLZCNTQZ128rmk, 0 },
{ X86::VPMOVSXBDZ128rrk, X86::VPMOVSXBDZ128rmk, TB_NO_REVERSE },
{ X86::VPMOVSXBQZ128rrk, X86::VPMOVSXBQZ128rmk, TB_NO_REVERSE },
{ X86::VPMOVSXBWZ128rrk, X86::VPMOVSXBWZ128rmk, TB_NO_REVERSE },
@@ -3034,6 +3070,64 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSRLDZ128rik, X86::VPSRLDZ128mik, 0 },
{ X86::VPSRLQZ128rik, X86::VPSRLQZ128mik, 0 },
{ X86::VPSRLWZ128rik, X86::VPSRLWZ128mik, 0 },
+
+ // AVX-512 masked compare instructions
+ { X86::VCMPPDZ128rrik, X86::VCMPPDZ128rmik, 0 },
+ { X86::VCMPPSZ128rrik, X86::VCMPPSZ128rmik, 0 },
+ { X86::VCMPPDZ256rrik, X86::VCMPPDZ256rmik, 0 },
+ { X86::VCMPPSZ256rrik, X86::VCMPPSZ256rmik, 0 },
+ { X86::VCMPPDZrrik, X86::VCMPPDZrmik, 0 },
+ { X86::VCMPPSZrrik, X86::VCMPPSZrmik, 0 },
+ { X86::VCMPSDZrr_Intk, X86::VCMPSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VCMPSSZrr_Intk, X86::VCMPSSZrm_Intk, TB_NO_REVERSE },
+ { X86::VPCMPBZ128rrik, X86::VPCMPBZ128rmik, 0 },
+ { X86::VPCMPBZ256rrik, X86::VPCMPBZ256rmik, 0 },
+ { X86::VPCMPBZrrik, X86::VPCMPBZrmik, 0 },
+ { X86::VPCMPDZ128rrik, X86::VPCMPDZ128rmik, 0 },
+ { X86::VPCMPDZ256rrik, X86::VPCMPDZ256rmik, 0 },
+ { X86::VPCMPDZrrik, X86::VPCMPDZrmik, 0 },
+ { X86::VPCMPEQBZ128rrk, X86::VPCMPEQBZ128rmk, 0 },
+ { X86::VPCMPEQBZ256rrk, X86::VPCMPEQBZ256rmk, 0 },
+ { X86::VPCMPEQBZrrk, X86::VPCMPEQBZrmk, 0 },
+ { X86::VPCMPEQDZ128rrk, X86::VPCMPEQDZ128rmk, 0 },
+ { X86::VPCMPEQDZ256rrk, X86::VPCMPEQDZ256rmk, 0 },
+ { X86::VPCMPEQDZrrk, X86::VPCMPEQDZrmk, 0 },
+ { X86::VPCMPEQQZ128rrk, X86::VPCMPEQQZ128rmk, 0 },
+ { X86::VPCMPEQQZ256rrk, X86::VPCMPEQQZ256rmk, 0 },
+ { X86::VPCMPEQQZrrk, X86::VPCMPEQQZrmk, 0 },
+ { X86::VPCMPEQWZ128rrk, X86::VPCMPEQWZ128rmk, 0 },
+ { X86::VPCMPEQWZ256rrk, X86::VPCMPEQWZ256rmk, 0 },
+ { X86::VPCMPEQWZrrk, X86::VPCMPEQWZrmk, 0 },
+ { X86::VPCMPGTBZ128rrk, X86::VPCMPGTBZ128rmk, 0 },
+ { X86::VPCMPGTBZ256rrk, X86::VPCMPGTBZ256rmk, 0 },
+ { X86::VPCMPGTBZrrk, X86::VPCMPGTBZrmk, 0 },
+ { X86::VPCMPGTDZ128rrk, X86::VPCMPGTDZ128rmk, 0 },
+ { X86::VPCMPGTDZ256rrk, X86::VPCMPGTDZ256rmk, 0 },
+ { X86::VPCMPGTDZrrk, X86::VPCMPGTDZrmk, 0 },
+ { X86::VPCMPGTQZ128rrk, X86::VPCMPGTQZ128rmk, 0 },
+ { X86::VPCMPGTQZ256rrk, X86::VPCMPGTQZ256rmk, 0 },
+ { X86::VPCMPGTQZrrk, X86::VPCMPGTQZrmk, 0 },
+ { X86::VPCMPGTWZ128rrk, X86::VPCMPGTWZ128rmk, 0 },
+ { X86::VPCMPGTWZ256rrk, X86::VPCMPGTWZ256rmk, 0 },
+ { X86::VPCMPGTWZrrk, X86::VPCMPGTWZrmk, 0 },
+ { X86::VPCMPQZ128rrik, X86::VPCMPQZ128rmik, 0 },
+ { X86::VPCMPQZ256rrik, X86::VPCMPQZ256rmik, 0 },
+ { X86::VPCMPQZrrik, X86::VPCMPQZrmik, 0 },
+ { X86::VPCMPUBZ128rrik, X86::VPCMPUBZ128rmik, 0 },
+ { X86::VPCMPUBZ256rrik, X86::VPCMPUBZ256rmik, 0 },
+ { X86::VPCMPUBZrrik, X86::VPCMPUBZrmik, 0 },
+ { X86::VPCMPUDZ128rrik, X86::VPCMPUDZ128rmik, 0 },
+ { X86::VPCMPUDZ256rrik, X86::VPCMPUDZ256rmik, 0 },
+ { X86::VPCMPUDZrrik, X86::VPCMPUDZrmik, 0 },
+ { X86::VPCMPUQZ128rrik, X86::VPCMPUQZ128rmik, 0 },
+ { X86::VPCMPUQZ256rrik, X86::VPCMPUQZ256rmik, 0 },
+ { X86::VPCMPUQZrrik, X86::VPCMPUQZrmik, 0 },
+ { X86::VPCMPUWZ128rrik, X86::VPCMPUWZ128rmik, 0 },
+ { X86::VPCMPUWZ256rrik, X86::VPCMPUWZ256rmik, 0 },
+ { X86::VPCMPUWZrrik, X86::VPCMPUWZrmik, 0 },
+ { X86::VPCMPWZ128rrik, X86::VPCMPWZ128rmik, 0 },
+ { X86::VPCMPWZ256rrik, X86::VPCMPWZ256rmik, 0 },
+ { X86::VPCMPWZrrik, X86::VPCMPWZrmik, 0 },
};
for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) {
@@ -5136,20 +5230,32 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
return nullptr;
}
}
- case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri:
- case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri:
- case X86::VPCMPBZrri: case X86::VPCMPUBZrri:
- case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri:
- case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri:
- case X86::VPCMPDZrri: case X86::VPCMPUDZrri:
- case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri:
- case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri:
- case X86::VPCMPQZrri: case X86::VPCMPUQZrri:
- case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri:
- case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri:
- case X86::VPCMPWZrri: case X86::VPCMPUWZrri: {
+ case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri:
+ case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri:
+ case X86::VPCMPBZrri: case X86::VPCMPUBZrri:
+ case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri:
+ case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri:
+ case X86::VPCMPDZrri: case X86::VPCMPUDZrri:
+ case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri:
+ case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri:
+ case X86::VPCMPQZrri: case X86::VPCMPUQZrri:
+ case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri:
+ case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri:
+ case X86::VPCMPWZrri: case X86::VPCMPUWZrri:
+ case X86::VPCMPBZ128rrik: case X86::VPCMPUBZ128rrik:
+ case X86::VPCMPBZ256rrik: case X86::VPCMPUBZ256rrik:
+ case X86::VPCMPBZrrik: case X86::VPCMPUBZrrik:
+ case X86::VPCMPDZ128rrik: case X86::VPCMPUDZ128rrik:
+ case X86::VPCMPDZ256rrik: case X86::VPCMPUDZ256rrik:
+ case X86::VPCMPDZrrik: case X86::VPCMPUDZrrik:
+ case X86::VPCMPQZ128rrik: case X86::VPCMPUQZ128rrik:
+ case X86::VPCMPQZ256rrik: case X86::VPCMPUQZ256rrik:
+ case X86::VPCMPQZrrik: case X86::VPCMPUQZrrik:
+ case X86::VPCMPWZ128rrik: case X86::VPCMPUWZ128rrik:
+ case X86::VPCMPWZ256rrik: case X86::VPCMPUWZ256rrik:
+ case X86::VPCMPWZrrik: case X86::VPCMPUWZrrik: {
// Flip comparison mode immediate (if necessary).
- unsigned Imm = MI.getOperand(3).getImm() & 0x7;
+ unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x7;
switch (Imm) {
default: llvm_unreachable("Unreachable!");
case 0x01: Imm = 0x06; break; // LT -> NLE
@@ -5163,7 +5269,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
break;
}
auto &WorkingMI = cloneIfNew(MI);
- WorkingMI.getOperand(3).setImm(Imm);
+ WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
diff --git a/lib/Testing/CMakeLists.txt b/lib/Testing/CMakeLists.txt
new file mode 100644
index 000000000000..fc23e64eeb7a
--- /dev/null
+++ b/lib/Testing/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(Support)
diff --git a/lib/Testing/LLVMBuild.txt b/lib/Testing/LLVMBuild.txt
new file mode 100644
index 000000000000..cdf83736298e
--- /dev/null
+++ b/lib/Testing/LLVMBuild.txt
@@ -0,0 +1,19 @@
+;===- ./lib/Testing/LLVMBuild.txt ------------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = Support
diff --git a/lib/Testing/Support/CMakeLists.txt b/lib/Testing/Support/CMakeLists.txt
new file mode 100644
index 000000000000..fa8dfe59c8bd
--- /dev/null
+++ b/lib/Testing/Support/CMakeLists.txt
@@ -0,0 +1,12 @@
+add_llvm_library(LLVMTestingSupport
+ Error.cpp
+
+ BUILDTREE_ONLY
+
+ ADDITIONAL_HEADER_DIRS
+ ${LLVM_MAIN_INCLUDE_DIR}/llvm/Testing/Support
+ )
+
+include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include)
+include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googlemock/include)
+target_link_libraries(LLVMTestingSupport PRIVATE gtest) \ No newline at end of file
diff --git a/lib/Testing/Support/Error.cpp b/lib/Testing/Support/Error.cpp
new file mode 100644
index 000000000000..ce0da44da408
--- /dev/null
+++ b/lib/Testing/Support/Error.cpp
@@ -0,0 +1,22 @@
+//===- llvm/Testing/Support/Error.cpp -------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Testing/Support/Error.h"
+
+#include "llvm/ADT/StringRef.h"
+
+using namespace llvm;
+
+llvm::detail::ErrorHolder llvm::detail::TakeError(llvm::Error Err) {
+ bool Succeeded = !static_cast<bool>(Err);
+ std::string Message;
+ if (!Succeeded)
+ Message = toString(std::move(Err));
+ return {Succeeded, Message};
+}
diff --git a/lib/Testing/Support/LLVMBuild.txt b/lib/Testing/Support/LLVMBuild.txt
new file mode 100644
index 000000000000..40853e8172d5
--- /dev/null
+++ b/lib/Testing/Support/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./Testing/Support/LLVMBuild.txt --------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = TestingSupport
+parent = Libraries
+required_libraries = Support
diff --git a/lib/Transforms/IPO/CrossDSOCFI.cpp b/lib/Transforms/IPO/CrossDSOCFI.cpp
index 1b111de06157..d94aa5da8560 100644
--- a/lib/Transforms/IPO/CrossDSOCFI.cpp
+++ b/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -95,6 +95,17 @@ void CrossDSOCFI::buildCFICheck(Module &M) {
}
}
+ NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions");
+ if (CfiFunctionsMD) {
+ for (auto Func : CfiFunctionsMD->operands()) {
+ assert(Func->getNumOperands() >= 2);
+ for (unsigned I = 2; I < Func->getNumOperands(); ++I)
+ if (ConstantInt *TypeId =
+ extractNumericTypeId(cast<MDNode>(Func->getOperand(I).get())))
+ TypeIds.insert(TypeId->getZExtValue());
+ }
+ }
+
LLVMContext &Ctx = M.getContext();
Constant *C = M.getOrInsertFunction(
"__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx),
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index c0dfeede05c5..ad89e40661c6 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -523,40 +523,47 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
if (!Callee || Callee->isDeclaration())
continue;
- // If this call site is dead and it is to a readonly function, we should
- // just delete the call instead of trying to inline it, regardless of
- // size. This happens because IPSCCP propagates the result out of the
- // call and then we're left with the dead call.
- if (isInstructionTriviallyDead(CS.getInstruction(), &TLI)) {
- DEBUG(dbgs() << " -> Deleting dead call: " << *CS.getInstruction()
- << "\n");
- // Update the call graph by deleting the edge from Callee to Caller.
- CG[Caller]->removeCallEdgeFor(CS);
- CS.getInstruction()->eraseFromParent();
- ++NumCallsDeleted;
- } else {
+ Instruction *Instr = CS.getInstruction();
+
+ bool IsTriviallyDead = isInstructionTriviallyDead(Instr, &TLI);
+
+ int InlineHistoryID;
+ if (!IsTriviallyDead) {
// If this call site was obtained by inlining another function, verify
// that the include path for the function did not include the callee
// itself. If so, we'd be recursively inlining the same function,
// which would provide the same callsites, which would cause us to
// infinitely inline.
- int InlineHistoryID = CallSites[CSi].second;
+ InlineHistoryID = CallSites[CSi].second;
if (InlineHistoryID != -1 &&
InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
continue;
+ }
+ // FIXME for new PM: because of the old PM we currently generate ORE and
+ // in turn BFI on demand. With the new PM, the ORE dependency should
+ // just become a regular analysis dependency.
+ OptimizationRemarkEmitter ORE(Caller);
+
+ // If the policy determines that we should inline this function,
+ // delete the call instead.
+ if (!shouldInline(CS, GetInlineCost, ORE))
+ continue;
+
+ // If this call site is dead and it is to a readonly function, we should
+ // just delete the call instead of trying to inline it, regardless of
+ // size. This happens because IPSCCP propagates the result out of the
+ // call and then we're left with the dead call.
+ if (IsTriviallyDead) {
+ DEBUG(dbgs() << " -> Deleting dead call: " << *Instr << "\n");
+ // Update the call graph by deleting the edge from Callee to Caller.
+ CG[Caller]->removeCallEdgeFor(CS);
+ Instr->eraseFromParent();
+ ++NumCallsDeleted;
+ } else {
// Get DebugLoc to report. CS will be invalid after Inliner.
- DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+ DebugLoc DLoc = Instr->getDebugLoc();
BasicBlock *Block = CS.getParent();
- // FIXME for new PM: because of the old PM we currently generate ORE and
- // in turn BFI on demand. With the new PM, the ORE dependency should
- // just become a regular analysis dependency.
- OptimizationRemarkEmitter ORE(Caller);
-
- // If the policy determines that we should inline this function,
- // try to do so.
- if (!shouldInline(CS, GetInlineCost, ORE))
- continue;
// Attempt to inline the function.
using namespace ore;
diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp
index 90896d285f5a..b406c22c69d7 100644
--- a/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -206,17 +207,26 @@ struct ByteArrayInfo {
class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> {
GlobalObject *GO;
size_t NTypes;
+ // For functions: true if this is a definition (either in the merged module or
+ // in one of the thinlto modules).
+ bool IsDefinition;
+ // For functions: true if this function is either defined or used in a thinlto
+ // module and its jumptable entry needs to be exported to thinlto backends.
+ bool IsExported;
friend TrailingObjects;
size_t numTrailingObjects(OverloadToken<MDNode *>) const { return NTypes; }
public:
static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalObject *GO,
+ bool IsDefinition, bool IsExported,
ArrayRef<MDNode *> Types) {
auto *GTM = static_cast<GlobalTypeMember *>(Alloc.Allocate(
totalSizeToAlloc<MDNode *>(Types.size()), alignof(GlobalTypeMember)));
GTM->GO = GO;
GTM->NTypes = Types.size();
+ GTM->IsDefinition = IsDefinition;
+ GTM->IsExported = IsExported;
std::uninitialized_copy(Types.begin(), Types.end(),
GTM->getTrailingObjects<MDNode *>());
return GTM;
@@ -224,6 +234,12 @@ public:
GlobalObject *getGlobal() const {
return GO;
}
+ bool isDefinition() const {
+ return IsDefinition;
+ }
+ bool isExported() const {
+ return IsExported;
+ }
ArrayRef<MDNode *> types() const {
return makeArrayRef(getTrailingObjects<MDNode *>(), NTypes);
}
@@ -294,6 +310,7 @@ class LowerTypeTestsModule {
void exportTypeId(StringRef TypeId, const TypeIdLowering &TIL);
TypeIdLowering importTypeId(StringRef TypeId);
void importTypeTest(CallInst *CI);
+ void importFunction(Function *F, bool isDefinition);
BitSetInfo
buildBitSet(Metadata *TypeId,
@@ -820,6 +837,41 @@ void LowerTypeTestsModule::importTypeTest(CallInst *CI) {
CI->eraseFromParent();
}
+// ThinLTO backend: the function F has a jump table entry; update this module
+// accordingly. isDefinition describes the type of the jump table entry.
+void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) {
+ assert(F->getType()->getAddressSpace() == 0);
+
+ // Declaration of a local function - nothing to do.
+ if (F->isDeclarationForLinker() && isDefinition)
+ return;
+
+ GlobalValue::VisibilityTypes Visibility = F->getVisibility();
+ std::string Name = F->getName();
+ Function *FDecl;
+
+ if (F->isDeclarationForLinker() && !isDefinition) {
+ // Declaration of an external function.
+ FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
+ Name + ".cfi_jt", &M);
+ FDecl->setVisibility(GlobalValue::HiddenVisibility);
+ } else {
+ // Definition.
+ assert(isDefinition);
+ F->setName(Name + ".cfi");
+ F->setLinkage(GlobalValue::ExternalLinkage);
+ F->setVisibility(GlobalValue::HiddenVisibility);
+ FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
+ Name, &M);
+ FDecl->setVisibility(Visibility);
+ }
+
+ if (F->isWeakForLinker())
+ replaceWeakDeclarationWithJumpTablePtr(F, FDecl);
+ else
+ F->replaceAllUsesWith(FDecl);
+}
+
void LowerTypeTestsModule::lowerTypeTestCalls(
ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) {
@@ -1143,7 +1195,6 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
// arithmetic that we normally use for globals.
// FIXME: find a better way to represent the jumptable in the IR.
-
assert(!Functions.empty());
// Build a simple layout based on the regular layout of jump tables.
@@ -1167,6 +1218,7 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
// references to the original functions with references to the aliases.
for (unsigned I = 0; I != Functions.size(); ++I) {
Function *F = cast<Function>(Functions[I]->getGlobal());
+ bool IsDefinition = Functions[I]->isDefinition();
Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast(
ConstantExpr::getInBoundsGetElementPtr(
@@ -1174,7 +1226,18 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
ConstantInt::get(IntPtrTy, I)}),
F->getType());
- if (F->isDeclarationForLinker()) {
+ if (Functions[I]->isExported()) {
+ if (IsDefinition) {
+ ExportSummary->cfiFunctionDefs().insert(F->getName());
+ } else {
+ GlobalAlias *JtAlias = GlobalAlias::create(
+ F->getValueType(), 0, GlobalValue::ExternalLinkage,
+ F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M);
+ JtAlias->setVisibility(GlobalValue::HiddenVisibility);
+ ExportSummary->cfiFunctionDecls().insert(F->getName());
+ }
+ }
+ if (!IsDefinition) {
if (F->isWeakForLinker())
replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr);
else
@@ -1182,9 +1245,8 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
} else {
assert(F->getType()->getAddressSpace() == 0);
- GlobalAlias *FAlias = GlobalAlias::create(F->getValueType(), 0,
- F->getLinkage(), "",
- CombinedGlobalElemPtr, &M);
+ GlobalAlias *FAlias = GlobalAlias::create(
+ F->getValueType(), 0, F->getLinkage(), "", CombinedGlobalElemPtr, &M);
FAlias->setVisibility(F->getVisibility());
FAlias->takeName(F);
if (FAlias->hasName())
@@ -1353,15 +1415,37 @@ bool LowerTypeTestsModule::runForTesting(Module &M) {
bool LowerTypeTestsModule::lower() {
Function *TypeTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
- if ((!TypeTestFunc || TypeTestFunc->use_empty()) && !ExportSummary)
+ if ((!TypeTestFunc || TypeTestFunc->use_empty()) && !ExportSummary &&
+ !ImportSummary)
return false;
if (ImportSummary) {
- for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end();
- UI != UE;) {
- auto *CI = cast<CallInst>((*UI++).getUser());
- importTypeTest(CI);
+ if (TypeTestFunc) {
+ for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end();
+ UI != UE;) {
+ auto *CI = cast<CallInst>((*UI++).getUser());
+ importTypeTest(CI);
+ }
+ }
+
+ SmallVector<Function *, 8> Defs;
+ SmallVector<Function *, 8> Decls;
+ for (auto &F : M) {
+ // CFI functions are either external, or promoted. A local function may
+ // have the same name, but it's not the one we are looking for.
+ if (F.hasLocalLinkage())
+ continue;
+ if (ImportSummary->cfiFunctionDefs().count(F.getName()))
+ Defs.push_back(&F);
+ else if (ImportSummary->cfiFunctionDecls().count(F.getName()))
+ Decls.push_back(&F);
}
+
+ for (auto F : Defs)
+ importFunction(F, /*isDefinition*/ true);
+ for (auto F : Decls)
+ importFunction(F, /*isDefinition*/ false);
+
return true;
}
@@ -1387,6 +1471,58 @@ bool LowerTypeTestsModule::lower() {
llvm::DenseMap<Metadata *, TIInfo> TypeIdInfo;
unsigned I = 0;
SmallVector<MDNode *, 2> Types;
+
+ struct ExportedFunctionInfo {
+ CfiFunctionLinkage Linkage;
+ MDNode *FuncMD; // {name, linkage, type[, type...]}
+ };
+ DenseMap<StringRef, ExportedFunctionInfo> ExportedFunctions;
+ if (ExportSummary) {
+ NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions");
+ if (CfiFunctionsMD) {
+ for (auto FuncMD : CfiFunctionsMD->operands()) {
+ assert(FuncMD->getNumOperands() >= 2);
+ StringRef FunctionName =
+ cast<MDString>(FuncMD->getOperand(0))->getString();
+ if (!ExportSummary->isGUIDLive(GlobalValue::getGUID(
+ GlobalValue::dropLLVMManglingEscape(FunctionName))))
+ continue;
+ CfiFunctionLinkage Linkage = static_cast<CfiFunctionLinkage>(
+ cast<ConstantAsMetadata>(FuncMD->getOperand(1))
+ ->getValue()
+ ->getUniqueInteger()
+ .getZExtValue());
+ auto P = ExportedFunctions.insert({FunctionName, {Linkage, FuncMD}});
+ if (!P.second && P.first->second.Linkage != CFL_Definition)
+ P.first->second = {Linkage, FuncMD};
+ }
+
+ for (const auto &P : ExportedFunctions) {
+ StringRef FunctionName = P.first;
+ CfiFunctionLinkage Linkage = P.second.Linkage;
+ MDNode *FuncMD = P.second.FuncMD;
+ Function *F = M.getFunction(FunctionName);
+ if (!F)
+ F = Function::Create(
+ FunctionType::get(Type::getVoidTy(M.getContext()), false),
+ GlobalVariable::ExternalLinkage, FunctionName, &M);
+
+ if (Linkage == CFL_Definition)
+ F->eraseMetadata(LLVMContext::MD_type);
+
+ if (F->isDeclaration()) {
+ if (Linkage == CFL_WeakDeclaration)
+ F->setLinkage(GlobalValue::ExternalWeakLinkage);
+
+ SmallVector<MDNode *, 2> Types;
+ for (unsigned I = 2; I < FuncMD->getNumOperands(); ++I)
+ F->addMetadata(LLVMContext::MD_type,
+ *cast<MDNode>(FuncMD->getOperand(I).get()));
+ }
+ }
+ }
+ }
+
for (GlobalObject &GO : M.global_objects()) {
if (isa<GlobalVariable>(GO) && GO.isDeclarationForLinker())
continue;
@@ -1396,7 +1532,15 @@ bool LowerTypeTestsModule::lower() {
if (Types.empty())
continue;
- auto *GTM = GlobalTypeMember::create(Alloc, &GO, Types);
+ bool IsDefinition = !GO.isDeclarationForLinker();
+ bool IsExported = false;
+ if (isa<Function>(GO) && ExportedFunctions.count(GO.getName())) {
+ IsDefinition |= ExportedFunctions[GO.getName()].Linkage == CFL_Definition;
+ IsExported = true;
+ }
+
+ auto *GTM =
+ GlobalTypeMember::create(Alloc, &GO, IsDefinition, IsExported, Types);
for (MDNode *Type : Types) {
verifyTypeMDNode(&GO, Type);
auto &Info = TypeIdInfo[cast<MDNode>(Type)->getOperand(1)];
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index ea805efc66b7..8840435af642 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -103,6 +103,35 @@ struct PartialInlinerImpl {
bool run(Module &M);
Function *unswitchFunction(Function *F);
+ // This class speculatively clones the the function to be partial inlined.
+ // At the end of partial inlining, the remaining callsites to the cloned
+ // function that are not partially inlined will be fixed up to reference
+ // the original function, and the cloned function will be erased.
+ struct FunctionCloner {
+ FunctionCloner(Function *F, FunctionOutliningInfo *OI);
+ ~FunctionCloner();
+
+ // Prepare for function outlining: making sure there is only
+ // one incoming edge from the extracted/outlined region to
+ // the return block.
+ void NormalizeReturnBlock();
+
+ // Do function outlining:
+ Function *doFunctionOutlining();
+
+ Function *OrigFunc = nullptr;
+ Function *ClonedFunc = nullptr;
+ Function *OutlinedFunc = nullptr;
+ BasicBlock *OutliningCallBB = nullptr;
+ // ClonedFunc is inlined in one of its callers after function
+ // outlining.
+ bool IsFunctionInlined = false;
+ // The cost of the region to be outlined.
+ int OutlinedRegionCost = 0;
+ std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
+ std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
+ };
+
private:
int NumPartialInlining = 0;
std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
@@ -114,27 +143,18 @@ private:
// The result is no larger than 1 and is represented using BP.
// (Note that the outlined region's 'head' block can only have incoming
// edges from the guarding entry blocks).
- BranchProbability getOutliningCallBBRelativeFreq(Function *F,
- FunctionOutliningInfo *OI,
- Function *DuplicateFunction,
- BlockFrequencyInfo *BFI,
- BasicBlock *OutliningCallBB);
+ BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner);
// Return true if the callee of CS should be partially inlined with
// profit.
- bool shouldPartialInline(CallSite CS, Function *F, FunctionOutliningInfo *OI,
- BlockFrequencyInfo *CalleeBFI,
- BasicBlock *OutliningCallBB,
- int OutliningCallOverhead,
+ bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
+ BlockFrequency WeightedOutliningRcost,
OptimizationRemarkEmitter &ORE);
// Try to inline DuplicateFunction (cloned from F with call to
// the OutlinedFunction into its callers. Return true
// if there is any successful inlining.
- bool tryPartialInline(Function *DuplicateFunction,
- Function *F, /*orignal function */
- FunctionOutliningInfo *OI, Function *OutlinedFunction,
- BlockFrequencyInfo *CalleeBFI);
+ bool tryPartialInline(FunctionCloner &Cloner);
// Compute the mapping from use site of DuplicationFunction to the enclosing
// BB's profile count.
@@ -146,7 +166,7 @@ private:
NumPartialInlining >= MaxNumPartialInlining);
}
- CallSite getCallSite(User *U) {
+ static CallSite getCallSite(User *U) {
CallSite CS;
if (CallInst *CI = dyn_cast<CallInst>(U))
CS = CallSite(CI);
@@ -157,7 +177,7 @@ private:
return CS;
}
- CallSite getOneCallSiteTo(Function *F) {
+ static CallSite getOneCallSiteTo(Function *F) {
User *User = *F->user_begin();
return getCallSite(User);
}
@@ -171,20 +191,15 @@ private:
// Returns the costs associated with function outlining:
// - The first value is the non-weighted runtime cost for making the call
- // to the outlined function 'OutlinedFunction', including the addtional
- // setup cost in the outlined function itself;
+ // to the outlined function, including the addtional setup cost in the
+ // outlined function itself;
// - The second value is the estimated size of the new call sequence in
- // basic block 'OutliningCallBB';
- // - The third value is the estimated size of the original code from
- // function 'F' that is extracted into the outlined function.
- std::tuple<int, int, int>
- computeOutliningCosts(Function *F, const FunctionOutliningInfo *OutliningInfo,
- Function *OutlinedFunction,
- BasicBlock *OutliningCallBB);
+ // basic block Cloner.OutliningCallBB;
+ std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
// Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
// approximate both the size and runtime cost (Note that in the current
// inline cost analysis, there is no clear distinction there either).
- int computeBBInlineCost(BasicBlock *BB);
+ static int computeBBInlineCost(BasicBlock *BB);
std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
@@ -396,19 +411,19 @@ static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
return false;
}
-BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
- Function *F, FunctionOutliningInfo *OI, Function *DuplicateFunction,
- BlockFrequencyInfo *BFI, BasicBlock *OutliningCallBB) {
+BranchProbability
+PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
auto EntryFreq =
- BFI->getBlockFreq(&DuplicateFunction->getEntryBlock());
- auto OutliningCallFreq = BFI->getBlockFreq(OutliningCallBB);
+ Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
+ auto OutliningCallFreq =
+ Cloner.ClonedFuncBFI->getBlockFreq(Cloner.OutliningCallBB);
auto OutlineRegionRelFreq =
BranchProbability::getBranchProbability(OutliningCallFreq.getFrequency(),
EntryFreq.getFrequency());
- if (hasProfileData(F, OI))
+ if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))
return OutlineRegionRelFreq;
// When profile data is not available, we need to be conservative in
@@ -433,15 +448,17 @@ BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
}
bool PartialInlinerImpl::shouldPartialInline(
- CallSite CS, Function *F /* Original Callee */, FunctionOutliningInfo *OI,
- BlockFrequencyInfo *CalleeBFI, BasicBlock *OutliningCallBB,
- int NonWeightedOutliningRcost, OptimizationRemarkEmitter &ORE) {
+ CallSite CS, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
+ OptimizationRemarkEmitter &ORE) {
+
using namespace ore;
if (SkipCostAnalysis)
return true;
Instruction *Call = CS.getInstruction();
Function *Callee = CS.getCalledFunction();
+ assert(Callee == Cloner.ClonedFunc);
+
Function *Caller = CS.getCaller();
auto &CalleeTTI = (*GetTTI)(*Callee);
InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
@@ -449,14 +466,14 @@ bool PartialInlinerImpl::shouldPartialInline(
if (IC.isAlways()) {
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
- << NV("Callee", F)
+ << NV("Callee", Cloner.OrigFunc)
<< " should always be fully inlined, not partially");
return false;
}
if (IC.isNever()) {
ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
- << NV("Callee", F) << " not partially inlined into "
+ << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
<< NV("Caller", Caller)
<< " because it should never be inlined (cost=never)");
return false;
@@ -464,29 +481,25 @@ bool PartialInlinerImpl::shouldPartialInline(
if (!IC) {
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
- << NV("Callee", F) << " not partially inlined into "
+ << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
<< NV("Caller", Caller) << " because too costly to inline (cost="
<< NV("Cost", IC.getCost()) << ", threshold="
<< NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
return false;
}
const DataLayout &DL = Caller->getParent()->getDataLayout();
+
// The savings of eliminating the call:
int NonWeightedSavings = getCallsiteCost(CS, DL);
BlockFrequency NormWeightedSavings(NonWeightedSavings);
- auto RelativeFreq =
- getOutliningCallBBRelativeFreq(F, OI, Callee, CalleeBFI, OutliningCallBB);
- auto NormWeightedRcost =
- BlockFrequency(NonWeightedOutliningRcost) * RelativeFreq;
-
// Weighted saving is smaller than weighted cost, return false
- if (NormWeightedSavings < NormWeightedRcost) {
+ if (NormWeightedSavings < WeightedOutliningRcost) {
ORE.emit(
OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", Call)
- << NV("Callee", F) << " not partially inlined into "
+ << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
<< NV("Caller", Caller) << " runtime overhead (overhead="
- << NV("Overhead", (unsigned)NormWeightedRcost.getFrequency())
+ << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
<< ", savings="
<< NV("Savings", (unsigned)NormWeightedSavings.getFrequency()) << ")"
<< " of making the outlined call is too high");
@@ -495,7 +508,7 @@ bool PartialInlinerImpl::shouldPartialInline(
}
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
- << NV("Callee", F) << " can be partially inlined into "
+ << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
<< NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
<< " (threshold="
<< NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
@@ -551,50 +564,32 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
return InlineCost;
}
-std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts(
- Function *F, const FunctionOutliningInfo *OI, Function *OutlinedFunction,
- BasicBlock *OutliningCallBB) {
- // First compute the cost of the outlined region 'OI' in the original
- // function 'F'.
- // FIXME: The code extractor (outliner) can now do code sinking/hoisting
- // to reduce outlining cost. The hoisted/sunk code currently do not
- // incur any runtime cost so it is still OK to compare the outlined
- // function cost with the outlined region in the original function.
- // If this ever changes, we will need to introduce new extractor api
- // to pass the information.
- int OutlinedRegionCost = 0;
- for (BasicBlock &BB : *F) {
- if (&BB != OI->ReturnBlock &&
- // Assuming Entry set is small -- do a linear search here:
- std::find(OI->Entries.begin(), OI->Entries.end(), &BB) ==
- OI->Entries.end()) {
- OutlinedRegionCost += computeBBInlineCost(&BB);
- }
- }
+std::tuple<int, int>
+PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
// Now compute the cost of the call sequence to the outlined function
// 'OutlinedFunction' in BB 'OutliningCallBB':
- int OutliningFuncCallCost = computeBBInlineCost(OutliningCallBB);
+ int OutliningFuncCallCost = computeBBInlineCost(Cloner.OutliningCallBB);
// Now compute the cost of the extracted/outlined function itself:
int OutlinedFunctionCost = 0;
- for (BasicBlock &BB : *OutlinedFunction) {
+ for (BasicBlock &BB : *Cloner.OutlinedFunc) {
OutlinedFunctionCost += computeBBInlineCost(&BB);
}
- assert(OutlinedFunctionCost >= OutlinedRegionCost &&
+ assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
"Outlined function cost should be no less than the outlined region");
// The code extractor introduces a new root and exit stub blocks with
// additional unconditional branches. Those branches will be eliminated
// later with bb layout. The cost should be adjusted accordingly:
OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;
- int OutliningRuntimeOverhead = OutliningFuncCallCost +
- (OutlinedFunctionCost - OutlinedRegionCost) +
- ExtraOutliningPenalty;
+ int OutliningRuntimeOverhead =
+ OutliningFuncCallCost +
+ (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
+ ExtraOutliningPenalty;
- return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead,
- OutlinedRegionCost);
+ return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
}
// Create the callsite to profile count map which is
@@ -641,42 +636,30 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap(
}
}
-Function *PartialInlinerImpl::unswitchFunction(Function *F) {
-
- if (F->hasAddressTaken())
- return nullptr;
-
- // Let inliner handle it
- if (F->hasFnAttribute(Attribute::AlwaysInline))
- return nullptr;
-
- if (F->hasFnAttribute(Attribute::NoInline))
- return nullptr;
-
- if (PSI->isFunctionEntryCold(F))
- return nullptr;
-
- if (F->user_begin() == F->user_end())
- return nullptr;
-
- std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
-
- if (!OI)
- return nullptr;
+PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F,
+ FunctionOutliningInfo *OI)
+ : OrigFunc(F) {
+ ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
// Clone the function, so that we can hack away on it.
ValueToValueMapTy VMap;
- Function *DuplicateFunction = CloneFunction(F, VMap);
- BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
- BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
- DenseSet<BasicBlock *> NewEntries;
+ ClonedFunc = CloneFunction(F, VMap);
+
+ ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
+ ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
for (BasicBlock *BB : OI->Entries) {
- NewEntries.insert(cast<BasicBlock>(VMap[BB]));
+ ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
+ }
+ for (BasicBlock *E : OI->ReturnBlockPreds) {
+ BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
+ ClonedOI->ReturnBlockPreds.push_back(NewE);
}
-
// Go ahead and update all uses to the duplicate, so that we can just
// use the inliner functionality when we're done hacking.
- F->replaceAllUsesWith(DuplicateFunction);
+ F->replaceAllUsesWith(ClonedFunc);
+}
+
+void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
auto getFirstPHI = [](BasicBlock *BB) {
BasicBlock::iterator I = BB->begin();
@@ -692,14 +675,19 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
}
return FirstPhi;
};
+
// Special hackery is needed with PHI nodes that have inputs from more than
// one extracted block. For simplicity, just split the PHIs into a two-level
// sequence of PHIs, some of which will go in the extracted region, and some
// of which will go outside.
- BasicBlock *PreReturn = NewReturnBlock;
+ BasicBlock *PreReturn = ClonedOI->ReturnBlock;
// only split block when necessary:
PHINode *FirstPhi = getFirstPHI(PreReturn);
- unsigned NumPredsFromEntries = OI->ReturnBlockPreds.size();
+ unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
+
+ if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
+ return;
+
auto IsTrivialPhi = [](PHINode *PN) -> Value * {
Value *CommonValue = PN->getIncomingValue(0);
if (all_of(PN->incoming_values(),
@@ -708,143 +696,185 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
return nullptr;
};
- if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) {
-
- NewReturnBlock = NewReturnBlock->splitBasicBlock(
- NewReturnBlock->getFirstNonPHI()->getIterator());
- BasicBlock::iterator I = PreReturn->begin();
- Instruction *Ins = &NewReturnBlock->front();
- SmallVector<Instruction *, 4> DeadPhis;
- while (I != PreReturn->end()) {
- PHINode *OldPhi = dyn_cast<PHINode>(I);
- if (!OldPhi)
- break;
-
- PHINode *RetPhi =
- PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
- OldPhi->replaceAllUsesWith(RetPhi);
- Ins = NewReturnBlock->getFirstNonPHI();
+ ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
+ ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
+ BasicBlock::iterator I = PreReturn->begin();
+ Instruction *Ins = &ClonedOI->ReturnBlock->front();
+ SmallVector<Instruction *, 4> DeadPhis;
+ while (I != PreReturn->end()) {
+ PHINode *OldPhi = dyn_cast<PHINode>(I);
+ if (!OldPhi)
+ break;
- RetPhi->addIncoming(&*I, PreReturn);
- for (BasicBlock *E : OI->ReturnBlockPreds) {
- BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
- RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE);
- OldPhi->removeIncomingValue(NewE);
- }
+ PHINode *RetPhi =
+ PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
+ OldPhi->replaceAllUsesWith(RetPhi);
+ Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
- // After incoming values splitting, the old phi may become trivial.
- // Keeping the trivial phi can introduce definition inside the outline
- // region which is live-out, causing necessary overhead (load, store
- // arg passing etc).
- if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
- OldPhi->replaceAllUsesWith(OldPhiVal);
- DeadPhis.push_back(OldPhi);
- }
-
- ++I;
+ RetPhi->addIncoming(&*I, PreReturn);
+ for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
+ RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
+ OldPhi->removeIncomingValue(E);
}
+ // After incoming values splitting, the old phi may become trivial.
+ // Keeping the trivial phi can introduce definition inside the outline
+ // region which is live-out, causing necessary overhead (load, store
+ // arg passing etc).
+ if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
+ OldPhi->replaceAllUsesWith(OldPhiVal);
+ DeadPhis.push_back(OldPhi);
+ }
+ ++I;
+ }
for (auto *DP : DeadPhis)
DP->eraseFromParent();
- for (auto E : OI->ReturnBlockPreds) {
- BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
- NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);
+ for (auto E : ClonedOI->ReturnBlockPreds) {
+ E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
}
- }
+}
+Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() {
// Returns true if the block is to be partial inlined into the caller
// (i.e. not to be extracted to the out of line function)
- auto ToBeInlined = [&](BasicBlock *BB) {
- return BB == NewReturnBlock || NewEntries.count(BB);
+ auto ToBeInlined = [&, this](BasicBlock *BB) {
+ return BB == ClonedOI->ReturnBlock ||
+ (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) !=
+ ClonedOI->Entries.end());
};
+
// Gather up the blocks that we're going to extract.
std::vector<BasicBlock *> ToExtract;
- ToExtract.push_back(NewNonReturnBlock);
- for (BasicBlock &BB : *DuplicateFunction)
- if (!ToBeInlined(&BB) && &BB != NewNonReturnBlock)
+ ToExtract.push_back(ClonedOI->NonReturnBlock);
+ OutlinedRegionCost +=
+ PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
+ for (BasicBlock &BB : *ClonedFunc)
+ if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
ToExtract.push_back(&BB);
+ // FIXME: the code extractor may hoist/sink more code
+ // into the outlined function which may make the outlining
+ // overhead (the difference of the outlined function cost
+ // and OutliningRegionCost) look larger.
+ OutlinedRegionCost += computeBBInlineCost(&BB);
+ }
// The CodeExtractor needs a dominator tree.
DominatorTree DT;
- DT.recalculate(*DuplicateFunction);
+ DT.recalculate(*ClonedFunc);
// Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
LoopInfo LI(DT);
- BranchProbabilityInfo BPI(*DuplicateFunction, LI);
- BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI);
+ BranchProbabilityInfo BPI(*ClonedFunc, LI);
+ ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
// Extract the body of the if.
- Function *OutlinedFunction =
- CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI)
- .extractCodeRegion();
+ OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
+ ClonedFuncBFI.get(), &BPI)
+ .extractCodeRegion();
+
+ if (OutlinedFunc) {
+ OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
+ .getInstruction()
+ ->getParent();
+ assert(OutliningCallBB->getParent() == ClonedFunc);
+ }
- bool AnyInline =
- tryPartialInline(DuplicateFunction, F, OI.get(), OutlinedFunction, &BFI);
+ return OutlinedFunc;
+}
+PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
// Ditch the duplicate, since we're done with it, and rewrite all remaining
// users (function pointers, etc.) back to the original function.
- DuplicateFunction->replaceAllUsesWith(F);
- DuplicateFunction->eraseFromParent();
+ ClonedFunc->replaceAllUsesWith(OrigFunc);
+ ClonedFunc->eraseFromParent();
+ if (!IsFunctionInlined) {
+ // Remove the function that is speculatively created if there is no
+ // reference.
+ if (OutlinedFunc)
+ OutlinedFunc->eraseFromParent();
+ }
+}
+
+Function *PartialInlinerImpl::unswitchFunction(Function *F) {
+
+ if (F->hasAddressTaken())
+ return nullptr;
+
+ // Let inliner handle it
+ if (F->hasFnAttribute(Attribute::AlwaysInline))
+ return nullptr;
+
+ if (F->hasFnAttribute(Attribute::NoInline))
+ return nullptr;
+
+ if (PSI->isFunctionEntryCold(F))
+ return nullptr;
+
+ if (F->user_begin() == F->user_end())
+ return nullptr;
+
+ std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
+
+ if (!OI)
+ return nullptr;
+
+ FunctionCloner Cloner(F, OI.get());
+ Cloner.NormalizeReturnBlock();
+ Function *OutlinedFunction = Cloner.doFunctionOutlining();
+
+ bool AnyInline = tryPartialInline(Cloner);
if (AnyInline)
return OutlinedFunction;
- // Remove the function that is speculatively created:
- if (OutlinedFunction)
- OutlinedFunction->eraseFromParent();
-
return nullptr;
}
-bool PartialInlinerImpl::tryPartialInline(Function *DuplicateFunction,
- Function *F,
- FunctionOutliningInfo *OI,
- Function *OutlinedFunction,
- BlockFrequencyInfo *CalleeBFI) {
- if (OutlinedFunction == nullptr)
- return false;
-
+bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
int NonWeightedRcost;
int SizeCost;
- int OutlinedRegionSizeCost;
- auto OutliningCallBB =
- getOneCallSiteTo(OutlinedFunction).getInstruction()->getParent();
+ if (Cloner.OutlinedFunc == nullptr)
+ return false;
+
+ std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
- std::tie(SizeCost, NonWeightedRcost, OutlinedRegionSizeCost) =
- computeOutliningCosts(F, OI, OutlinedFunction, OutliningCallBB);
+ auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
+ auto WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
// The call sequence to the outlined function is larger than the original
// outlined region size, it does not increase the chances of inlining
- // 'F' with outlining (The inliner usies the size increase to model the
- // the cost of inlining a callee).
- if (!SkipCostAnalysis && OutlinedRegionSizeCost < SizeCost) {
- OptimizationRemarkEmitter ORE(F);
+ // the function with outlining (The inliner usies the size increase to
+ // model the cost of inlining a callee).
+ if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
+ OptimizationRemarkEmitter ORE(Cloner.OrigFunc);
DebugLoc DLoc;
BasicBlock *Block;
- std::tie(DLoc, Block) = getOneDebugLoc(DuplicateFunction);
+ std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
DLoc, Block)
- << ore::NV("Function", F)
+ << ore::NV("Function", Cloner.OrigFunc)
<< " not partially inlined into callers (Original Size = "
- << ore::NV("OutlinedRegionOriginalSize", OutlinedRegionSizeCost)
+ << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
<< ", Size of call sequence to outlined function = "
<< ore::NV("NewSize", SizeCost) << ")");
return false;
}
- assert(F->user_begin() == F->user_end() &&
+ assert(Cloner.OrigFunc->user_begin() == Cloner.OrigFunc->user_end() &&
"F's users should all be replaced!");
- std::vector<User *> Users(DuplicateFunction->user_begin(),
- DuplicateFunction->user_end());
+
+ std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
+ Cloner.ClonedFunc->user_end());
DenseMap<User *, uint64_t> CallSiteToProfCountMap;
- if (F->getEntryCount())
- computeCallsiteToProfCountMap(DuplicateFunction, CallSiteToProfCountMap);
+ if (Cloner.OrigFunc->getEntryCount())
+ computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
- auto CalleeEntryCount = F->getEntryCount();
+ auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0);
+
bool AnyInline = false;
for (User *User : Users) {
CallSite CS = getCallSite(User);
@@ -854,13 +884,12 @@ bool PartialInlinerImpl::tryPartialInline(Function *DuplicateFunction,
OptimizationRemarkEmitter ORE(CS.getCaller());
- if (!shouldPartialInline(CS, F, OI, CalleeBFI, OutliningCallBB,
- NonWeightedRcost, ORE))
+ if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE))
continue;
ORE.emit(
OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction())
- << ore::NV("Callee", F) << " partially inlined into "
+ << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
<< ore::NV("Caller", CS.getCaller()));
InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
@@ -878,8 +907,11 @@ bool PartialInlinerImpl::tryPartialInline(Function *DuplicateFunction,
NumPartialInlined++;
}
- if (AnyInline && CalleeEntryCount)
- F->setEntryCount(CalleeEntryCountV);
+ if (AnyInline) {
+ Cloner.IsFunctionInlined = true;
+ if (CalleeEntryCount)
+ Cloner.OrigFunc->setEntryCount(CalleeEntryCountV);
+ }
return AnyInline;
}
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 16fba32e9805..4bc64ab698ff 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -141,6 +141,10 @@ static cl::opt<int> PreInlineThreshold(
cl::desc("Control the amount of inlining in pre-instrumentation inliner "
"(default = 75)"));
+static cl::opt<bool> EnableEarlyCSEMemSSA(
+ "enable-earlycse-memssa", cl::init(false), cl::Hidden,
+ cl::desc("Enable the EarlyCSE w/ MemorySSA pass (default = off)"));
+
static cl::opt<bool> EnableGVNHoist(
"enable-gvn-hoist", cl::init(false), cl::Hidden,
cl::desc("Enable the GVN hoisting pass (default = off)"));
@@ -308,7 +312,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
// Start of function pass.
// Break up aggregate allocas, using SSAUpdater.
MPM.add(createSROAPass());
- MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
+ MPM.add(createEarlyCSEPass(EnableEarlyCSEMemSSA)); // Catch trivial redundancies
if (EnableGVNHoist)
MPM.add(createGVNHoistPass());
if (EnableGVNSink) {
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index a7bcc7cc5532..802f470ffe1f 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -32,7 +32,8 @@ namespace {
// Promote each local-linkage entity defined by ExportM and used by ImportM by
// changing visibility and appending the given ModuleId.
-void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
+void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId,
+ SetVector<GlobalValue *> &PromoteExtra) {
DenseMap<const Comdat *, Comdat *> RenamedComdats;
for (auto &ExportGV : ExportM.global_values()) {
if (!ExportGV.hasLocalLinkage())
@@ -40,7 +41,7 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
auto Name = ExportGV.getName();
GlobalValue *ImportGV = ImportM.getNamedValue(Name);
- if (!ImportGV || ImportGV->use_empty())
+ if ((!ImportGV || ImportGV->use_empty()) && !PromoteExtra.count(&ExportGV))
continue;
std::string NewName = (Name + ModuleId).str();
@@ -53,8 +54,10 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
ExportGV.setLinkage(GlobalValue::ExternalLinkage);
ExportGV.setVisibility(GlobalValue::HiddenVisibility);
- ImportGV->setName(NewName);
- ImportGV->setVisibility(GlobalValue::HiddenVisibility);
+ if (ImportGV) {
+ ImportGV->setName(NewName);
+ ImportGV->setVisibility(GlobalValue::HiddenVisibility);
+ }
}
if (!RenamedComdats.empty())
@@ -296,6 +299,11 @@ void splitAndWriteThinLTOBitcode(
F.setComdat(nullptr);
}
+ SetVector<GlobalValue *> CfiFunctions;
+ for (auto &F : M)
+ if ((!F.hasLocalLinkage() || F.hasAddressTaken()) && HasTypeMetadata(&F))
+ CfiFunctions.insert(&F);
+
// Remove all globals with type metadata, globals with comdats that live in
// MergedM, and aliases pointing to such globals from the thin LTO module.
filterModule(&M, [&](const GlobalValue *GV) {
@@ -308,11 +316,39 @@ void splitAndWriteThinLTOBitcode(
return true;
});
- promoteInternals(*MergedM, M, ModuleId);
- promoteInternals(M, *MergedM, ModuleId);
+ promoteInternals(*MergedM, M, ModuleId, CfiFunctions);
+ promoteInternals(M, *MergedM, ModuleId, CfiFunctions);
+
+ SmallVector<MDNode *, 8> CfiFunctionMDs;
+ for (auto V : CfiFunctions) {
+ Function &F = *cast<Function>(V);
+ SmallVector<MDNode *, 2> Types;
+ F.getMetadata(LLVMContext::MD_type, Types);
+
+ auto &Ctx = MergedM->getContext();
+ SmallVector<Metadata *, 4> Elts;
+ Elts.push_back(MDString::get(Ctx, F.getName()));
+ CfiFunctionLinkage Linkage;
+ if (!F.isDeclarationForLinker())
+ Linkage = CFL_Definition;
+ else if (F.isWeakForLinker())
+ Linkage = CFL_WeakDeclaration;
+ else
+ Linkage = CFL_Declaration;
+ Elts.push_back(ConstantAsMetadata::get(
+ llvm::ConstantInt::get(Type::getInt8Ty(Ctx), Linkage)));
+ for (auto Type : Types)
+ Elts.push_back(Type);
+ CfiFunctionMDs.push_back(MDTuple::get(Ctx, Elts));
+ }
- simplifyExternals(*MergedM);
+ if(!CfiFunctionMDs.empty()) {
+ NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("cfi.functions");
+ for (auto MD : CfiFunctionMDs)
+ NMD->addOperand(MD);
+ }
+ simplifyExternals(*MergedM);
// FIXME: Try to re-use BSI and PFI from the original module here.
ProfileSummaryInfo PSI(M);
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 4fe3225a2172..a881bda5ba98 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -763,8 +763,54 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS,
return nullptr;
}
+// Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
+// Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2)
+Value *InstCombiner::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS,
+ bool JoinedByAnd,
+ Instruction &CxtI) {
+ ICmpInst::Predicate Pred = LHS->getPredicate();
+ if (Pred != RHS->getPredicate())
+ return nullptr;
+ if (JoinedByAnd && Pred != ICmpInst::ICMP_NE)
+ return nullptr;
+ if (!JoinedByAnd && Pred != ICmpInst::ICMP_EQ)
+ return nullptr;
+
+ // TODO support vector splats
+ ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1));
+ ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS->getOperand(1));
+ if (!LHSC || !RHSC || !LHSC->isZero() || !RHSC->isZero())
+ return nullptr;
+
+ Value *A, *B, *C, *D;
+ if (match(LHS->getOperand(0), m_And(m_Value(A), m_Value(B))) &&
+ match(RHS->getOperand(0), m_And(m_Value(C), m_Value(D)))) {
+ if (A == D || B == D)
+ std::swap(C, D);
+ if (B == C)
+ std::swap(A, B);
+
+ if (A == C &&
+ isKnownToBeAPowerOfTwo(B, false, 0, &CxtI) &&
+ isKnownToBeAPowerOfTwo(D, false, 0, &CxtI)) {
+ Value *Mask = Builder->CreateOr(B, D);
+ Value *Masked = Builder->CreateAnd(A, Mask);
+ auto NewPred = JoinedByAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
+ return Builder->CreateICmp(NewPred, Masked, Mask);
+ }
+ }
+
+ return nullptr;
+}
+
/// Fold (icmp)&(icmp) if possible.
-Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
+Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
+ Instruction &CxtI) {
+ // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2)
+ // if K1 and K2 are a one-bit mask.
+ if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, true, CxtI))
+ return V;
+
ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
// (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
@@ -1127,8 +1173,8 @@ Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) {
ICmpInst *ICmp0 = dyn_cast<ICmpInst>(Cast0Src);
ICmpInst *ICmp1 = dyn_cast<ICmpInst>(Cast1Src);
if (ICmp0 && ICmp1) {
- Value *Res = LogicOpc == Instruction::And ? foldAndOfICmps(ICmp0, ICmp1)
- : foldOrOfICmps(ICmp0, ICmp1, &I);
+ Value *Res = LogicOpc == Instruction::And ? foldAndOfICmps(ICmp0, ICmp1, I)
+ : foldOrOfICmps(ICmp0, ICmp1, I);
if (Res)
return CastInst::Create(CastOpcode, Res, DestTy);
return nullptr;
@@ -1426,7 +1472,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
ICmpInst *LHS = dyn_cast<ICmpInst>(Op0);
ICmpInst *RHS = dyn_cast<ICmpInst>(Op1);
if (LHS && RHS)
- if (Value *Res = foldAndOfICmps(LHS, RHS))
+ if (Value *Res = foldAndOfICmps(LHS, RHS, I))
return replaceInstUsesWith(I, Res);
// TODO: Make this recursive; it's a little tricky because an arbitrary
@@ -1434,18 +1480,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
Value *X, *Y;
if (LHS && match(Op1, m_OneUse(m_And(m_Value(X), m_Value(Y))))) {
if (auto *Cmp = dyn_cast<ICmpInst>(X))
- if (Value *Res = foldAndOfICmps(LHS, Cmp))
+ if (Value *Res = foldAndOfICmps(LHS, Cmp, I))
return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y));
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
- if (Value *Res = foldAndOfICmps(LHS, Cmp))
+ if (Value *Res = foldAndOfICmps(LHS, Cmp, I))
return replaceInstUsesWith(I, Builder->CreateAnd(Res, X));
}
if (RHS && match(Op0, m_OneUse(m_And(m_Value(X), m_Value(Y))))) {
if (auto *Cmp = dyn_cast<ICmpInst>(X))
- if (Value *Res = foldAndOfICmps(Cmp, RHS))
+ if (Value *Res = foldAndOfICmps(Cmp, RHS, I))
return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y));
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
- if (Value *Res = foldAndOfICmps(Cmp, RHS))
+ if (Value *Res = foldAndOfICmps(Cmp, RHS, I))
return replaceInstUsesWith(I, Builder->CreateAnd(Res, X));
}
}
@@ -1591,41 +1637,16 @@ static Value *matchSelectFromAndOr(Value *A, Value *C, Value *B, Value *D,
/// Fold (icmp)|(icmp) if possible.
Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
- Instruction *CxtI) {
- ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
-
+ Instruction &CxtI) {
// Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
// if K1 and K2 are a one-bit mask.
- ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1));
- ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS->getOperand(1));
+ if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, false, CxtI))
+ return V;
- if (LHS->getPredicate() == ICmpInst::ICMP_EQ && LHSC && LHSC->isZero() &&
- RHS->getPredicate() == ICmpInst::ICMP_EQ && RHSC && RHSC->isZero()) {
-
- BinaryOperator *LAnd = dyn_cast<BinaryOperator>(LHS->getOperand(0));
- BinaryOperator *RAnd = dyn_cast<BinaryOperator>(RHS->getOperand(0));
- if (LAnd && RAnd && LAnd->hasOneUse() && RHS->hasOneUse() &&
- LAnd->getOpcode() == Instruction::And &&
- RAnd->getOpcode() == Instruction::And) {
-
- Value *Mask = nullptr;
- Value *Masked = nullptr;
- if (LAnd->getOperand(0) == RAnd->getOperand(0) &&
- isKnownToBeAPowerOfTwo(LAnd->getOperand(1), false, 0, CxtI) &&
- isKnownToBeAPowerOfTwo(RAnd->getOperand(1), false, 0, CxtI)) {
- Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1));
- Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask);
- } else if (LAnd->getOperand(1) == RAnd->getOperand(1) &&
- isKnownToBeAPowerOfTwo(LAnd->getOperand(0), false, 0, CxtI) &&
- isKnownToBeAPowerOfTwo(RAnd->getOperand(0), false, 0, CxtI)) {
- Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0));
- Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask);
- }
+ ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
- if (Masked)
- return Builder->CreateICmp(ICmpInst::ICMP_NE, Masked, Mask);
- }
- }
+ ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1));
+ ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS->getOperand(1));
// Fold (icmp ult/ule (A + C1), C3) | (icmp ult/ule (A + C2), C3)
// --> (icmp ult/ule ((A & ~(C1 ^ C2)) + max(C1, C2)), C3)
@@ -2117,12 +2138,16 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
}
// (A ^ B) | ((B ^ C) ^ A) -> (A ^ B) | C
+ // FIXME: The two hasOneUse calls here are the same call, maybe we were
+ // supposed to check Op1->operand(0)?
if (match(Op0, m_Xor(m_Value(A), m_Value(B))))
if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A))))
if (Op1->hasOneUse() || cast<BinaryOperator>(Op1)->hasOneUse())
return BinaryOperator::CreateOr(Op0, C);
// ((A ^ C) ^ B) | (B ^ A) -> (B ^ A) | C
+ // FIXME: The two hasOneUse calls here are the same call, maybe we were
+ // supposed to check Op0->operand(0)?
if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B))))
if (match(Op1, m_Xor(m_Specific(B), m_Specific(A))))
if (Op0->hasOneUse() || cast<BinaryOperator>(Op0)->hasOneUse())
@@ -2194,7 +2219,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
ICmpInst *LHS = dyn_cast<ICmpInst>(Op0);
ICmpInst *RHS = dyn_cast<ICmpInst>(Op1);
if (LHS && RHS)
- if (Value *Res = foldOrOfICmps(LHS, RHS, &I))
+ if (Value *Res = foldOrOfICmps(LHS, RHS, I))
return replaceInstUsesWith(I, Res);
// TODO: Make this recursive; it's a little tricky because an arbitrary
@@ -2202,18 +2227,18 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
Value *X, *Y;
if (LHS && match(Op1, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {
if (auto *Cmp = dyn_cast<ICmpInst>(X))
- if (Value *Res = foldOrOfICmps(LHS, Cmp, &I))
+ if (Value *Res = foldOrOfICmps(LHS, Cmp, I))
return replaceInstUsesWith(I, Builder->CreateOr(Res, Y));
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
- if (Value *Res = foldOrOfICmps(LHS, Cmp, &I))
+ if (Value *Res = foldOrOfICmps(LHS, Cmp, I))
return replaceInstUsesWith(I, Builder->CreateOr(Res, X));
}
if (RHS && match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {
if (auto *Cmp = dyn_cast<ICmpInst>(X))
- if (Value *Res = foldOrOfICmps(Cmp, RHS, &I))
+ if (Value *Res = foldOrOfICmps(Cmp, RHS, I))
return replaceInstUsesWith(I, Builder->CreateOr(Res, Y));
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
- if (Value *Res = foldOrOfICmps(Cmp, RHS, &I))
+ if (Value *Res = foldOrOfICmps(Cmp, RHS, I))
return replaceInstUsesWith(I, Builder->CreateOr(Res, X));
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index d29ed49eca0b..c0830a5d2112 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -94,75 +94,80 @@ static Constant *getNegativeIsTrueBoolVec(ConstantDataVector *V) {
return ConstantVector::get(BoolVec);
}
-Instruction *
-InstCombiner::SimplifyElementAtomicMemCpy(ElementAtomicMemCpyInst *AMI) {
+Instruction *InstCombiner::SimplifyElementUnorderedAtomicMemCpy(
+ ElementUnorderedAtomicMemCpyInst *AMI) {
// Try to unfold this intrinsic into sequence of explicit atomic loads and
// stores.
// First check that number of elements is compile time constant.
- auto *NumElementsCI = dyn_cast<ConstantInt>(AMI->getNumElements());
- if (!NumElementsCI)
+ auto *LengthCI = dyn_cast<ConstantInt>(AMI->getLength());
+ if (!LengthCI)
return nullptr;
// Check that there are not too many elements.
- uint64_t NumElements = NumElementsCI->getZExtValue();
+ uint64_t LengthInBytes = LengthCI->getZExtValue();
+ uint32_t ElementSizeInBytes = AMI->getElementSizeInBytes();
+ uint64_t NumElements = LengthInBytes / ElementSizeInBytes;
if (NumElements >= UnfoldElementAtomicMemcpyMaxElements)
return nullptr;
- // Don't unfold into illegal integers
- uint64_t ElementSizeInBytes = AMI->getElementSizeInBytes() * 8;
- if (!getDataLayout().isLegalInteger(ElementSizeInBytes))
- return nullptr;
+ // Only expand if there are elements to copy.
+ if (NumElements > 0) {
+ // Don't unfold into illegal integers
+ uint64_t ElementSizeInBits = ElementSizeInBytes * 8;
+ if (!getDataLayout().isLegalInteger(ElementSizeInBits))
+ return nullptr;
- // Cast source and destination to the correct type. Intrinsic input arguments
- // are usually represented as i8*.
- // Often operands will be explicitly casted to i8* and we can just strip
- // those casts instead of inserting new ones. However it's easier to rely on
- // other InstCombine rules which will cover trivial cases anyway.
- Value *Src = AMI->getRawSource();
- Value *Dst = AMI->getRawDest();
- Type *ElementPointerType = Type::getIntNPtrTy(
- AMI->getContext(), ElementSizeInBytes, Src->getType()->getPointerAddressSpace());
-
- Value *SrcCasted = Builder->CreatePointerCast(Src, ElementPointerType,
- "memcpy_unfold.src_casted");
- Value *DstCasted = Builder->CreatePointerCast(Dst, ElementPointerType,
- "memcpy_unfold.dst_casted");
-
- for (uint64_t i = 0; i < NumElements; ++i) {
- // Get current element addresses
- ConstantInt *ElementIdxCI =
- ConstantInt::get(AMI->getContext(), APInt(64, i));
- Value *SrcElementAddr =
- Builder->CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr");
- Value *DstElementAddr =
- Builder->CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr");
-
- // Load from the source. Transfer alignment information and mark load as
- // unordered atomic.
- LoadInst *Load = Builder->CreateLoad(SrcElementAddr, "memcpy_unfold.val");
- Load->setOrdering(AtomicOrdering::Unordered);
- // We know alignment of the first element. It is also guaranteed by the
- // verifier that element size is less or equal than first element alignment
- // and both of this values are powers of two.
- // This means that all subsequent accesses are at least element size
- // aligned.
- // TODO: We can infer better alignment but there is no evidence that this
- // will matter.
- Load->setAlignment(i == 0 ? AMI->getSrcAlignment()
- : AMI->getElementSizeInBytes());
- Load->setDebugLoc(AMI->getDebugLoc());
-
- // Store loaded value via unordered atomic store.
- StoreInst *Store = Builder->CreateStore(Load, DstElementAddr);
- Store->setOrdering(AtomicOrdering::Unordered);
- Store->setAlignment(i == 0 ? AMI->getDstAlignment()
- : AMI->getElementSizeInBytes());
- Store->setDebugLoc(AMI->getDebugLoc());
+ // Cast source and destination to the correct type. Intrinsic input
+ // arguments are usually represented as i8*. Often operands will be
+ // explicitly casted to i8* and we can just strip those casts instead of
+ // inserting new ones. However it's easier to rely on other InstCombine
+ // rules which will cover trivial cases anyway.
+ Value *Src = AMI->getRawSource();
+ Value *Dst = AMI->getRawDest();
+ Type *ElementPointerType =
+ Type::getIntNPtrTy(AMI->getContext(), ElementSizeInBits,
+ Src->getType()->getPointerAddressSpace());
+
+ Value *SrcCasted = Builder->CreatePointerCast(Src, ElementPointerType,
+ "memcpy_unfold.src_casted");
+ Value *DstCasted = Builder->CreatePointerCast(Dst, ElementPointerType,
+ "memcpy_unfold.dst_casted");
+
+ for (uint64_t i = 0; i < NumElements; ++i) {
+ // Get current element addresses
+ ConstantInt *ElementIdxCI =
+ ConstantInt::get(AMI->getContext(), APInt(64, i));
+ Value *SrcElementAddr =
+ Builder->CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr");
+ Value *DstElementAddr =
+ Builder->CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr");
+
+ // Load from the source. Transfer alignment information and mark load as
+ // unordered atomic.
+ LoadInst *Load = Builder->CreateLoad(SrcElementAddr, "memcpy_unfold.val");
+ Load->setOrdering(AtomicOrdering::Unordered);
+ // We know alignment of the first element. It is also guaranteed by the
+ // verifier that element size is less or equal than first element
+ // alignment and both of this values are powers of two. This means that
+ // all subsequent accesses are at least element size aligned.
+ // TODO: We can infer better alignment but there is no evidence that this
+ // will matter.
+ Load->setAlignment(i == 0 ? AMI->getParamAlignment(1)
+ : ElementSizeInBytes);
+ Load->setDebugLoc(AMI->getDebugLoc());
+
+ // Store loaded value via unordered atomic store.
+ StoreInst *Store = Builder->CreateStore(Load, DstElementAddr);
+ Store->setOrdering(AtomicOrdering::Unordered);
+ Store->setAlignment(i == 0 ? AMI->getParamAlignment(0)
+ : ElementSizeInBytes);
+ Store->setDebugLoc(AMI->getDebugLoc());
+ }
}
// Set the number of elements of the copy to 0, it will be deleted on the
// next iteration.
- AMI->setNumElements(Constant::getNullValue(NumElementsCI->getType()));
+ AMI->setLength(Constant::getNullValue(LengthCI->getType()));
return AMI;
}
@@ -1888,12 +1893,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (Changed) return II;
}
- if (auto *AMI = dyn_cast<ElementAtomicMemCpyInst>(II)) {
- if (Constant *C = dyn_cast<Constant>(AMI->getNumElements()))
+ if (auto *AMI = dyn_cast<ElementUnorderedAtomicMemCpyInst>(II)) {
+ if (Constant *C = dyn_cast<Constant>(AMI->getLength()))
if (C->isNullValue())
return eraseInstFromFunction(*AMI);
- if (Instruction *I = SimplifyElementAtomicMemCpy(AMI))
+ if (Instruction *I = SimplifyElementUnorderedAtomicMemCpy(AMI))
return I;
}
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index fd0a64a5bbb5..1a7db146df42 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -447,12 +447,14 @@ private:
Instruction::CastOps isEliminableCastPair(const CastInst *CI1,
const CastInst *CI2);
- Value *foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+ Value *foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &CxtI);
Value *foldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
- Value *foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction *CxtI);
+ Value *foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &CxtI);
Value *foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+ Value *foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS,
+ bool JoinedByAnd, Instruction &CxtI);
public:
/// \brief Inserts an instruction \p New before instruction \p Old
///
@@ -724,7 +726,8 @@ private:
Instruction *MatchBSwap(BinaryOperator &I);
bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
- Instruction *SimplifyElementAtomicMemCpy(ElementAtomicMemCpyInst *AMI);
+ Instruction *
+ SimplifyElementUnorderedAtomicMemCpy(ElementUnorderedAtomicMemCpyInst *AMI);
Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
Instruction *SimplifyMemSet(MemSetInst *MI);
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 3f2ddcacce2b..8cec865c6422 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -682,11 +682,11 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask));
}
- if (match(Op0, m_SExt(m_Value(X)))) {
+ if (match(Op0, m_SExt(m_Value(X))) &&
+ (!Ty->isIntegerTy() || shouldChangeType(Ty, X->getType()))) {
// Are we moving the sign bit to the low bit and widening with high zeros?
unsigned SrcTyBitWidth = X->getType()->getScalarSizeInBits();
- if (ShAmt == BitWidth - 1 &&
- (!Ty->isIntegerTy() || shouldChangeType(Ty, X->getType()))) {
+ if (ShAmt == BitWidth - 1) {
// lshr (sext i1 X to iN), N-1 --> zext X to iN
if (SrcTyBitWidth == 1)
return new ZExtInst(X, Ty);
@@ -698,7 +698,13 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
}
}
- // TODO: Convert to ashr+zext if the shift equals the extension amount.
+ // lshr (sext iM X to iN), N-M --> zext (ashr X, min(N-M, M-1)) to iN
+ if (ShAmt == BitWidth - SrcTyBitWidth && Op0->hasOneUse()) {
+ // The new shift amount can't be more than the narrow source type.
+ unsigned NewShAmt = std::min(ShAmt, SrcTyBitWidth - 1);
+ Value *AShr = Builder->CreateAShr(X, NewShAmt);
+ return new ZExtInst(AShr, Ty);
+ }
}
if (match(Op0, m_LShr(m_Value(X), m_APInt(ShOp1)))) {
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 7ff69b9eb7f4..f2806e278e6e 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_library(LLVMInstrumentation
Instrumentation.cpp
InstrProfiling.cpp
PGOInstrumentation.cpp
+ PGOMemOPSizeOpt.cpp
SanitizerCoverage.cpp
ThreadSanitizer.cpp
EfficiencySanitizer.cpp
diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 96027bc3d0a9..0d308810009d 100644
--- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -56,8 +56,6 @@ using namespace llvm;
STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
-STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized.");
-STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated.");
// Command line option to disable indirect-call promotion with the default as
// false. This is for debug purpose.
@@ -111,44 +109,6 @@ static cl::opt<bool>
ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
cl::desc("Dump IR after transformation happens"));
-// The minimum call count to optimize memory intrinsic calls.
-static cl::opt<unsigned>
- MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore,
- cl::init(1000),
- cl::desc("The minimum count to optimize memory "
- "intrinsic calls"));
-
-// Command line option to disable memory intrinsic optimization. The default is
-// false. This is for debug purpose.
-static cl::opt<bool> DisableMemOPOPT("disable-memop-opt", cl::init(false),
- cl::Hidden, cl::desc("Disable optimize"));
-
-// The percent threshold to optimize memory intrinsic calls.
-static cl::opt<unsigned>
- MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40),
- cl::Hidden, cl::ZeroOrMore,
- cl::desc("The percentage threshold for the "
- "memory intrinsic calls optimization"));
-
-// Maximum number of versions for optimizing memory intrinsic call.
-static cl::opt<unsigned>
- MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden,
- cl::ZeroOrMore,
- cl::desc("The max version for the optimized memory "
- " intrinsic calls"));
-
-// Scale the counts from the annotation using the BB count value.
-static cl::opt<bool>
- MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden,
- cl::desc("Scale the memop size counts using the basic "
- " block count value"));
-
-// This option sets the rangge of precise profile memop sizes.
-extern cl::opt<std::string> MemOPSizeRange;
-
-// This option sets the value that groups large memop sizes
-extern cl::opt<unsigned> MemOPSizeLarge;
-
namespace {
class PGOIndirectCallPromotionLegacyPass : public ModulePass {
public:
@@ -173,24 +133,6 @@ private:
// the promoted direct call.
bool SamplePGO;
};
-
-class PGOMemOPSizeOptLegacyPass : public FunctionPass {
-public:
- static char ID;
-
- PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) {
- initializePGOMemOPSizeOptLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- StringRef getPassName() const override { return "PGOMemOPSize"; }
-
-private:
- bool runOnFunction(Function &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<BlockFrequencyInfoWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
-};
} // end anonymous namespace
char PGOIndirectCallPromotionLegacyPass::ID = 0;
@@ -204,19 +146,6 @@ ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO,
return new PGOIndirectCallPromotionLegacyPass(InLTO, SamplePGO);
}
-char PGOMemOPSizeOptLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
- "Optimize memory intrinsic using its size value profile",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
-INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
- "Optimize memory intrinsic using its size value profile",
- false, false)
-
-FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() {
- return new PGOMemOPSizeOptLegacyPass();
-}
-
namespace {
// The class for main data structure to promote indirect calls to conditional
// direct calls.
@@ -749,285 +678,3 @@ PreservedAnalyses PGOIndirectCallPromotion::run(Module &M,
return PreservedAnalyses::none();
}
-
-namespace {
-class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
-public:
- MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI)
- : Func(Func), BFI(BFI), Changed(false) {
- ValueDataArray =
- llvm::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2);
- // Get the MemOPSize range information from option MemOPSizeRange,
- getMemOPSizeRangeFromOption(MemOPSizeRange, PreciseRangeStart,
- PreciseRangeLast);
- }
- bool isChanged() const { return Changed; }
- void perform() {
- WorkList.clear();
- visit(Func);
-
- for (auto &MI : WorkList) {
- ++NumOfPGOMemOPAnnotate;
- if (perform(MI)) {
- Changed = true;
- ++NumOfPGOMemOPOpt;
- DEBUG(dbgs() << "MemOP call: " << MI->getCalledFunction()->getName()
- << "is Transformed.\n");
- }
- }
- }
-
- void visitMemIntrinsic(MemIntrinsic &MI) {
- Value *Length = MI.getLength();
- // Not perform on constant length calls.
- if (dyn_cast<ConstantInt>(Length))
- return;
- WorkList.push_back(&MI);
- }
-
-private:
- Function &Func;
- BlockFrequencyInfo &BFI;
- bool Changed;
- std::vector<MemIntrinsic *> WorkList;
- // Start of the previse range.
- int64_t PreciseRangeStart;
- // Last value of the previse range.
- int64_t PreciseRangeLast;
- // The space to read the profile annotation.
- std::unique_ptr<InstrProfValueData[]> ValueDataArray;
- bool perform(MemIntrinsic *MI);
-
- // This kind shows which group the value falls in. For PreciseValue, we have
- // the profile count for that value. LargeGroup groups the values that are in
- // range [LargeValue, +inf). NonLargeGroup groups the rest of values.
- enum MemOPSizeKind { PreciseValue, NonLargeGroup, LargeGroup };
-
- MemOPSizeKind getMemOPSizeKind(int64_t Value) const {
- if (Value == MemOPSizeLarge && MemOPSizeLarge != 0)
- return LargeGroup;
- if (Value == PreciseRangeLast + 1)
- return NonLargeGroup;
- return PreciseValue;
- }
-};
-
-static const char *getMIName(const MemIntrinsic *MI) {
- switch (MI->getIntrinsicID()) {
- case Intrinsic::memcpy:
- return "memcpy";
- case Intrinsic::memmove:
- return "memmove";
- case Intrinsic::memset:
- return "memset";
- default:
- return "unknown";
- }
-}
-
-static bool isProfitable(uint64_t Count, uint64_t TotalCount) {
- assert(Count <= TotalCount);
- if (Count < MemOPCountThreshold)
- return false;
- if (Count < TotalCount * MemOPPercentThreshold / 100)
- return false;
- return true;
-}
-
-static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num,
- uint64_t Denom) {
- if (!MemOPScaleCount)
- return Count;
- bool Overflowed;
- uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed);
- return ScaleCount / Denom;
-}
-
-bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
- assert(MI);
- if (MI->getIntrinsicID() == Intrinsic::memmove)
- return false;
-
- uint32_t NumVals, MaxNumPromotions = MemOPMaxVersion + 2;
- uint64_t TotalCount;
- if (!getValueProfDataFromInst(*MI, IPVK_MemOPSize, MaxNumPromotions,
- ValueDataArray.get(), NumVals, TotalCount))
- return false;
-
- uint64_t ActualCount = TotalCount;
- uint64_t SavedTotalCount = TotalCount;
- if (MemOPScaleCount) {
- auto BBEdgeCount = BFI.getBlockProfileCount(MI->getParent());
- if (!BBEdgeCount)
- return false;
- ActualCount = *BBEdgeCount;
- }
-
- ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals);
- DEBUG(dbgs() << "Read one memory intrinsic profile with count " << ActualCount
- << "\n");
- DEBUG(
- for (auto &VD
- : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; });
-
- if (ActualCount < MemOPCountThreshold)
- return false;
- // Skip if the total value profiled count is 0, in which case we can't
- // scale up the counts properly (and there is no profitable transformation).
- if (TotalCount == 0)
- return false;
-
- TotalCount = ActualCount;
- if (MemOPScaleCount)
- DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount
- << " denominator = " << SavedTotalCount << "\n");
-
- // Keeping track of the count of the default case:
- uint64_t RemainCount = TotalCount;
- SmallVector<uint64_t, 16> SizeIds;
- SmallVector<uint64_t, 16> CaseCounts;
- uint64_t MaxCount = 0;
- unsigned Version = 0;
- // Default case is in the front -- save the slot here.
- CaseCounts.push_back(0);
- for (auto &VD : VDs) {
- int64_t V = VD.Value;
- uint64_t C = VD.Count;
- if (MemOPScaleCount)
- C = getScaledCount(C, ActualCount, SavedTotalCount);
-
- // Only care precise value here.
- if (getMemOPSizeKind(V) != PreciseValue)
- continue;
-
- // ValueCounts are sorted on the count. Break at the first un-profitable
- // value.
- if (!isProfitable(C, RemainCount))
- break;
-
- SizeIds.push_back(V);
- CaseCounts.push_back(C);
- if (C > MaxCount)
- MaxCount = C;
-
- assert(RemainCount >= C);
- RemainCount -= C;
-
- if (++Version > MemOPMaxVersion && MemOPMaxVersion != 0)
- break;
- }
-
- if (Version == 0)
- return false;
-
- CaseCounts[0] = RemainCount;
- if (RemainCount > MaxCount)
- MaxCount = RemainCount;
-
- uint64_t SumForOpt = TotalCount - RemainCount;
-
- DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version
- << " Versions (covering " << SumForOpt << " out of "
- << TotalCount << ")\n");
-
- // mem_op(..., size)
- // ==>
- // switch (size) {
- // case s1:
- // mem_op(..., s1);
- // goto merge_bb;
- // case s2:
- // mem_op(..., s2);
- // goto merge_bb;
- // ...
- // default:
- // mem_op(..., size);
- // goto merge_bb;
- // }
- // merge_bb:
-
- BasicBlock *BB = MI->getParent();
- DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
- DEBUG(dbgs() << *BB << "\n");
- auto OrigBBFreq = BFI.getBlockFreq(BB);
-
- BasicBlock *DefaultBB = SplitBlock(BB, MI);
- BasicBlock::iterator It(*MI);
- ++It;
- assert(It != DefaultBB->end());
- BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It));
- MergeBB->setName("MemOP.Merge");
- BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency());
- DefaultBB->setName("MemOP.Default");
-
- auto &Ctx = Func.getContext();
- IRBuilder<> IRB(BB);
- BB->getTerminator()->eraseFromParent();
- Value *SizeVar = MI->getLength();
- SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size());
-
- // Clear the value profile data.
- MI->setMetadata(LLVMContext::MD_prof, nullptr);
-
- DEBUG(dbgs() << "\n\n== Basic Block After==\n");
-
- for (uint64_t SizeId : SizeIds) {
- ConstantInt *CaseSizeId = ConstantInt::get(Type::getInt64Ty(Ctx), SizeId);
- BasicBlock *CaseBB = BasicBlock::Create(
- Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
- Instruction *NewInst = MI->clone();
- // Fix the argument.
- dyn_cast<MemIntrinsic>(NewInst)->setLength(CaseSizeId);
- CaseBB->getInstList().push_back(NewInst);
- IRBuilder<> IRBCase(CaseBB);
- IRBCase.CreateBr(MergeBB);
- SI->addCase(CaseSizeId, CaseBB);
- DEBUG(dbgs() << *CaseBB << "\n");
- }
- setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount);
-
- DEBUG(dbgs() << *BB << "\n");
- DEBUG(dbgs() << *DefaultBB << "\n");
- DEBUG(dbgs() << *MergeBB << "\n");
-
- emitOptimizationRemark(Func.getContext(), "memop-opt", Func,
- MI->getDebugLoc(),
- Twine("optimize ") + getMIName(MI) + " with count " +
- Twine(SumForOpt) + " out of " + Twine(TotalCount) +
- " for " + Twine(Version) + " versions");
-
- return true;
-}
-} // namespace
-
-static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI) {
- if (DisableMemOPOPT)
- return false;
-
- if (F.hasFnAttribute(Attribute::OptimizeForSize))
- return false;
- MemOPSizeOpt MemOPSizeOpt(F, BFI);
- MemOPSizeOpt.perform();
- return MemOPSizeOpt.isChanged();
-}
-
-bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) {
- BlockFrequencyInfo &BFI =
- getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
- return PGOMemOPSizeOptImpl(F, BFI);
-}
-
-namespace llvm {
-char &PGOMemOPSizeOptID = PGOMemOPSizeOptLegacyPass::ID;
-
-PreservedAnalyses PGOMemOPSizeOpt::run(Function &F,
- FunctionAnalysisManager &FAM) {
- auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
- bool Changed = PGOMemOPSizeOptImpl(F, BFI);
- if (!Changed)
- return PreservedAnalyses::all();
- auto PA = PreservedAnalyses();
- PA.preserve<GlobalsAA>();
- return PA;
-}
-} // namespace llvm
diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp
index f83c930ca61b..37f88d5f95f1 100644
--- a/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -343,14 +343,24 @@ static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) {
static inline bool shouldRecordFunctionAddr(Function *F) {
// Check the linkage
+ bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();
if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
- !F->hasAvailableExternallyLinkage())
+ !HasAvailableExternallyLinkage)
return true;
+
+ // A function marked 'alwaysinline' with available_externally linkage can't
+ // have its address taken. Doing so would create an undefined external ref to
+ // the function, which would fail to link.
+ if (HasAvailableExternallyLinkage &&
+ F->hasFnAttribute(Attribute::AlwaysInline))
+ return false;
+
// Prohibit function address recording if the function is both internal and
// COMDAT. This avoids the profile data variable referencing internal symbols
// in COMDAT.
if (F->hasLocalLinkage() && F->hasComdat())
return false;
+
// Check uses of this function for other than direct calls or invokes to it.
// Inline virtual functions have linkeOnceODR linkage. When a key method
// exists, the vtable will only be emitted in the TU where the key method
diff --git a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
new file mode 100644
index 000000000000..0bc9ddfbe4d3
--- /dev/null
+++ b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
@@ -0,0 +1,419 @@
+//===-- PGOMemOPSizeOpt.cpp - Optimizations based on value profiling ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the transformation that optimizes memory intrinsics
+// such as memcpy using the size value profile. When memory intrinsic size
+// value profile metadata is available, a single memory intrinsic is expanded
+// to a sequence of guarded specialized versions that are called with the
+// hottest size(s), for later expansion into more optimal inline sequences.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/PGOInstrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <cassert>
+#include <cstdint>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pgo-memop-opt"
+
+STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized.");
+STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated.");
+
+// The minimum call count to optimize memory intrinsic calls.
+static cl::opt<unsigned>
+ MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore,
+ cl::init(1000),
+ cl::desc("The minimum count to optimize memory "
+ "intrinsic calls"));
+
+// Command line option to disable memory intrinsic optimization. The default is
+// false. This is for debug purpose.
+static cl::opt<bool> DisableMemOPOPT("disable-memop-opt", cl::init(false),
+ cl::Hidden, cl::desc("Disable optimize"));
+
+// The percent threshold to optimize memory intrinsic calls.
+static cl::opt<unsigned>
+ MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40),
+ cl::Hidden, cl::ZeroOrMore,
+ cl::desc("The percentage threshold for the "
+ "memory intrinsic calls optimization"));
+
+// Maximum number of versions for optimizing memory intrinsic call.
+static cl::opt<unsigned>
+ MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden,
+ cl::ZeroOrMore,
+ cl::desc("The max version for the optimized memory "
+ " intrinsic calls"));
+
+// Scale the counts from the annotation using the BB count value.
+static cl::opt<bool>
+ MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden,
+ cl::desc("Scale the memop size counts using the basic "
+ " block count value"));
+
+// This option sets the rangge of precise profile memop sizes.
+extern cl::opt<std::string> MemOPSizeRange;
+
+// This option sets the value that groups large memop sizes
+extern cl::opt<unsigned> MemOPSizeLarge;
+
+namespace {
+class PGOMemOPSizeOptLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) {
+ initializePGOMemOPSizeOptLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "PGOMemOPSize"; }
+
+private:
+ bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+} // end anonymous namespace
+
+char PGOMemOPSizeOptLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
+ "Optimize memory intrinsic using its size value profile",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
+ "Optimize memory intrinsic using its size value profile",
+ false, false)
+
+FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() {
+ return new PGOMemOPSizeOptLegacyPass();
+}
+
+namespace {
+class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
+public:
+ MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI)
+ : Func(Func), BFI(BFI), Changed(false) {
+ ValueDataArray =
+ llvm::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2);
+ // Get the MemOPSize range information from option MemOPSizeRange,
+ getMemOPSizeRangeFromOption(MemOPSizeRange, PreciseRangeStart,
+ PreciseRangeLast);
+ }
+ bool isChanged() const { return Changed; }
+ void perform() {
+ WorkList.clear();
+ visit(Func);
+
+ for (auto &MI : WorkList) {
+ ++NumOfPGOMemOPAnnotate;
+ if (perform(MI)) {
+ Changed = true;
+ ++NumOfPGOMemOPOpt;
+ DEBUG(dbgs() << "MemOP call: " << MI->getCalledFunction()->getName()
+ << "is Transformed.\n");
+ }
+ }
+ }
+
+ void visitMemIntrinsic(MemIntrinsic &MI) {
+ Value *Length = MI.getLength();
+ // Not perform on constant length calls.
+ if (dyn_cast<ConstantInt>(Length))
+ return;
+ WorkList.push_back(&MI);
+ }
+
+private:
+ Function &Func;
+ BlockFrequencyInfo &BFI;
+ bool Changed;
+ std::vector<MemIntrinsic *> WorkList;
+ // Start of the previse range.
+ int64_t PreciseRangeStart;
+ // Last value of the previse range.
+ int64_t PreciseRangeLast;
+ // The space to read the profile annotation.
+ std::unique_ptr<InstrProfValueData[]> ValueDataArray;
+ bool perform(MemIntrinsic *MI);
+
+ // This kind shows which group the value falls in. For PreciseValue, we have
+ // the profile count for that value. LargeGroup groups the values that are in
+ // range [LargeValue, +inf). NonLargeGroup groups the rest of values.
+ enum MemOPSizeKind { PreciseValue, NonLargeGroup, LargeGroup };
+
+ MemOPSizeKind getMemOPSizeKind(int64_t Value) const {
+ if (Value == MemOPSizeLarge && MemOPSizeLarge != 0)
+ return LargeGroup;
+ if (Value == PreciseRangeLast + 1)
+ return NonLargeGroup;
+ return PreciseValue;
+ }
+};
+
+static const char *getMIName(const MemIntrinsic *MI) {
+ switch (MI->getIntrinsicID()) {
+ case Intrinsic::memcpy:
+ return "memcpy";
+ case Intrinsic::memmove:
+ return "memmove";
+ case Intrinsic::memset:
+ return "memset";
+ default:
+ return "unknown";
+ }
+}
+
+static bool isProfitable(uint64_t Count, uint64_t TotalCount) {
+ assert(Count <= TotalCount);
+ if (Count < MemOPCountThreshold)
+ return false;
+ if (Count < TotalCount * MemOPPercentThreshold / 100)
+ return false;
+ return true;
+}
+
+static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num,
+ uint64_t Denom) {
+ if (!MemOPScaleCount)
+ return Count;
+ bool Overflowed;
+ uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed);
+ return ScaleCount / Denom;
+}
+
+bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
+ assert(MI);
+ if (MI->getIntrinsicID() == Intrinsic::memmove)
+ return false;
+
+ uint32_t NumVals, MaxNumPromotions = MemOPMaxVersion + 2;
+ uint64_t TotalCount;
+ if (!getValueProfDataFromInst(*MI, IPVK_MemOPSize, MaxNumPromotions,
+ ValueDataArray.get(), NumVals, TotalCount))
+ return false;
+
+ uint64_t ActualCount = TotalCount;
+ uint64_t SavedTotalCount = TotalCount;
+ if (MemOPScaleCount) {
+ auto BBEdgeCount = BFI.getBlockProfileCount(MI->getParent());
+ if (!BBEdgeCount)
+ return false;
+ ActualCount = *BBEdgeCount;
+ }
+
+ ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals);
+ DEBUG(dbgs() << "Read one memory intrinsic profile with count " << ActualCount
+ << "\n");
+ DEBUG(
+ for (auto &VD
+ : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; });
+
+ if (ActualCount < MemOPCountThreshold)
+ return false;
+ // Skip if the total value profiled count is 0, in which case we can't
+ // scale up the counts properly (and there is no profitable transformation).
+ if (TotalCount == 0)
+ return false;
+
+ TotalCount = ActualCount;
+ if (MemOPScaleCount)
+ DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount
+ << " denominator = " << SavedTotalCount << "\n");
+
+ // Keeping track of the count of the default case:
+ uint64_t RemainCount = TotalCount;
+ uint64_t SavedRemainCount = SavedTotalCount;
+ SmallVector<uint64_t, 16> SizeIds;
+ SmallVector<uint64_t, 16> CaseCounts;
+ uint64_t MaxCount = 0;
+ unsigned Version = 0;
+ // Default case is in the front -- save the slot here.
+ CaseCounts.push_back(0);
+ for (auto &VD : VDs) {
+ int64_t V = VD.Value;
+ uint64_t C = VD.Count;
+ if (MemOPScaleCount)
+ C = getScaledCount(C, ActualCount, SavedTotalCount);
+
+ // Only care precise value here.
+ if (getMemOPSizeKind(V) != PreciseValue)
+ continue;
+
+ // ValueCounts are sorted on the count. Break at the first un-profitable
+ // value.
+ if (!isProfitable(C, RemainCount))
+ break;
+
+ SizeIds.push_back(V);
+ CaseCounts.push_back(C);
+ if (C > MaxCount)
+ MaxCount = C;
+
+ assert(RemainCount >= C);
+ RemainCount -= C;
+ assert(SavedRemainCount >= VD.Count);
+ SavedRemainCount -= VD.Count;
+
+ if (++Version > MemOPMaxVersion && MemOPMaxVersion != 0)
+ break;
+ }
+
+ if (Version == 0)
+ return false;
+
+ CaseCounts[0] = RemainCount;
+ if (RemainCount > MaxCount)
+ MaxCount = RemainCount;
+
+ uint64_t SumForOpt = TotalCount - RemainCount;
+
+ DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version
+ << " Versions (covering " << SumForOpt << " out of "
+ << TotalCount << ")\n");
+
+ // mem_op(..., size)
+ // ==>
+ // switch (size) {
+ // case s1:
+ // mem_op(..., s1);
+ // goto merge_bb;
+ // case s2:
+ // mem_op(..., s2);
+ // goto merge_bb;
+ // ...
+ // default:
+ // mem_op(..., size);
+ // goto merge_bb;
+ // }
+ // merge_bb:
+
+ BasicBlock *BB = MI->getParent();
+ DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
+ DEBUG(dbgs() << *BB << "\n");
+ auto OrigBBFreq = BFI.getBlockFreq(BB);
+
+ BasicBlock *DefaultBB = SplitBlock(BB, MI);
+ BasicBlock::iterator It(*MI);
+ ++It;
+ assert(It != DefaultBB->end());
+ BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It));
+ MergeBB->setName("MemOP.Merge");
+ BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency());
+ DefaultBB->setName("MemOP.Default");
+
+ auto &Ctx = Func.getContext();
+ IRBuilder<> IRB(BB);
+ BB->getTerminator()->eraseFromParent();
+ Value *SizeVar = MI->getLength();
+ SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size());
+
+ // Clear the value profile data.
+ MI->setMetadata(LLVMContext::MD_prof, nullptr);
+ // If all promoted, we don't need the MD.prof metadata.
+ if (SavedRemainCount > 0 || Version != NumVals)
+ // Otherwise we need update with the un-promoted records back.
+ annotateValueSite(*Func.getParent(), *MI, VDs.slice(Version),
+ SavedRemainCount, IPVK_MemOPSize, NumVals);
+
+ DEBUG(dbgs() << "\n\n== Basic Block After==\n");
+
+ for (uint64_t SizeId : SizeIds) {
+ ConstantInt *CaseSizeId = ConstantInt::get(Type::getInt64Ty(Ctx), SizeId);
+ BasicBlock *CaseBB = BasicBlock::Create(
+ Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
+ Instruction *NewInst = MI->clone();
+ // Fix the argument.
+ dyn_cast<MemIntrinsic>(NewInst)->setLength(CaseSizeId);
+ CaseBB->getInstList().push_back(NewInst);
+ IRBuilder<> IRBCase(CaseBB);
+ IRBCase.CreateBr(MergeBB);
+ SI->addCase(CaseSizeId, CaseBB);
+ DEBUG(dbgs() << *CaseBB << "\n");
+ }
+ setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount);
+
+ DEBUG(dbgs() << *BB << "\n");
+ DEBUG(dbgs() << *DefaultBB << "\n");
+ DEBUG(dbgs() << *MergeBB << "\n");
+
+ emitOptimizationRemark(Func.getContext(), "memop-opt", Func,
+ MI->getDebugLoc(),
+ Twine("optimize ") + getMIName(MI) + " with count " +
+ Twine(SumForOpt) + " out of " + Twine(TotalCount) +
+ " for " + Twine(Version) + " versions");
+
+ return true;
+}
+} // namespace
+
+static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI) {
+ if (DisableMemOPOPT)
+ return false;
+
+ if (F.hasFnAttribute(Attribute::OptimizeForSize))
+ return false;
+ MemOPSizeOpt MemOPSizeOpt(F, BFI);
+ MemOPSizeOpt.perform();
+ return MemOPSizeOpt.isChanged();
+}
+
+bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) {
+ BlockFrequencyInfo &BFI =
+ getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
+ return PGOMemOPSizeOptImpl(F, BFI);
+}
+
+namespace llvm {
+char &PGOMemOPSizeOptID = PGOMemOPSizeOptLegacyPass::ID;
+
+PreservedAnalyses PGOMemOPSizeOpt::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+ bool Changed = PGOMemOPSizeOptImpl(F, BFI);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ auto PA = PreservedAnalyses();
+ PA.preserve<GlobalsAA>();
+ return PA;
+}
+} // namespace llvm
diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 8aa40d1759de..e3c36c98ab0d 100644
--- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -61,7 +61,7 @@ static const char *const SanCov8bitCountersInitName =
"__sanitizer_cov_8bit_counters_init";
static const char *const SanCovGuardsSectionName = "sancov_guards";
-static const char *const SanCovCountersSectionName = "sancov_counters";
+static const char *const SanCovCountersSectionName = "sancov_cntrs";
static cl::opt<int> ClCoverageLevel(
"sanitizer-coverage-level",
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 7b625b9b136e..2a4c9526dfcd 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -551,7 +551,7 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, const SimplifyQuery &SQ) {
BBChanged = true;
}
}
- };
+ }
FnChanged |= BBChanged;
}
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index c4f450949e6d..0f92760a874b 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -15,6 +15,7 @@
#include "llvm/Transforms/Scalar/EarlyCSE.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
@@ -506,7 +507,7 @@ private:
if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst)) {
// Optimize MemoryPhi nodes that may become redundant by having all the
// same input values once MA is removed.
- SmallVector<MemoryPhi *, 4> PhisToCheck;
+ SmallSetVector<MemoryPhi *, 4> PhisToCheck;
SmallVector<MemoryAccess *, 8> WorkQueue;
WorkQueue.push_back(MA);
// Process MemoryPhi nodes in FIFO order using a ever-growing vector since
@@ -517,7 +518,7 @@ private:
for (auto *U : WI->users())
if (MemoryPhi *MP = dyn_cast<MemoryPhi>(U))
- PhisToCheck.push_back(MP);
+ PhisToCheck.insert(MP);
MSSAUpdater->removeMemoryAccess(WI);
diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp
index 8634816e702f..5fd2dfc118b4 100644
--- a/lib/Transforms/Scalar/GVNSink.cpp
+++ b/lib/Transforms/Scalar/GVNSink.cpp
@@ -64,6 +64,17 @@ using namespace llvm;
STATISTIC(NumRemoved, "Number of instructions removed");
+namespace llvm {
+namespace GVNExpression {
+
+LLVM_DUMP_METHOD void Expression::dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+}
+
+}
+}
+
namespace {
static bool isMemoryInst(const Instruction *I) {
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index b706152f30c8..8b435050ac76 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -983,21 +983,21 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
const SCEV *NumBytesS =
SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
+ if (StoreSize != 1)
+ NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
+ SCEV::FlagNUW);
+
+ Value *NumBytes =
+ Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
+
unsigned Align = std::min(SI->getAlignment(), LI->getAlignment());
CallInst *NewCall = nullptr;
// Check whether to generate an unordered atomic memcpy:
// If the load or store are atomic, then they must neccessarily be unordered
// by previous checks.
- if (!SI->isAtomic() && !LI->isAtomic()) {
- if (StoreSize != 1)
- NumBytesS = SE->getMulExpr(
- NumBytesS, SE->getConstant(IntPtrTy, StoreSize), SCEV::FlagNUW);
-
- Value *NumBytes =
- Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
-
+ if (!SI->isAtomic() && !LI->isAtomic())
NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align);
- } else {
+ else {
// We cannot allow unaligned ops for unordered load/store, so reject
// anything where the alignment isn't at least the element size.
if (Align < StoreSize)
@@ -1010,11 +1010,9 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
return false;
- Value *NumElements =
- Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
+ NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
+ StoreBasePtr, LoadBasePtr, NumBytes, StoreSize);
- NewCall = Builder.CreateElementAtomicMemCpy(StoreBasePtr, LoadBasePtr,
- NumElements, StoreSize);
// Propagate alignment info onto the pointer args. Note that unordered
// atomic loads/stores are *required* by the spec to have an alignment
// but non-atomic loads/stores may not.
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 6926aae37963..cbbd55512c9f 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -2195,7 +2195,7 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
// For a given expression, mark the phi of ops instructions that could have
// changed as a result.
void NewGVN::markPhiOfOpsChanged(const Expression *E) {
- touchAndErase(ExpressionToPhiOfOps, E);
+ touchAndErase(ExpressionToPhiOfOps, ExactEqualsExpression(*E));
}
// Perform congruence finding on a given value numbering expression.
@@ -3561,7 +3561,7 @@ bool NewGVN::eliminateInstructions(Function &F) {
// TODO: It would be faster to use getNumIncomingBlocks() on a phi node in
// the block and subtract the pred count, but it's more complicated.
if (ReachablePredCount.lookup(BB) !=
- std::distance(pred_begin(BB), pred_end(BB))) {
+ unsigned(std::distance(pred_begin(BB), pred_end(BB)))) {
for (auto II = BB->begin(); isa<PHINode>(II); ++II) {
auto &PHI = cast<PHINode>(*II);
ReplaceUnreachablePHIArgs(PHI, BB);
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index bae7911d222c..a52739bb76f7 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -89,10 +89,10 @@ struct RewriteStatepointsForGC : public ModulePass {
Changed |= runOnFunction(F);
if (Changed) {
- // stripNonValidAttributes asserts that shouldRewriteStatepointsIn
+ // stripNonValidAttributesAndMetadata asserts that shouldRewriteStatepointsIn
// returns true for at least one function in the module. Since at least
// one function changed, we know that the precondition is satisfied.
- stripNonValidAttributes(M);
+ stripNonValidAttributesAndMetadata(M);
}
return Changed;
@@ -105,20 +105,24 @@ struct RewriteStatepointsForGC : public ModulePass {
AU.addRequired<TargetTransformInfoWrapperPass>();
}
- /// The IR fed into RewriteStatepointsForGC may have had attributes implying
- /// dereferenceability that are no longer valid/correct after
- /// RewriteStatepointsForGC has run. This is because semantically, after
+ /// The IR fed into RewriteStatepointsForGC may have had attributes and
+ /// metadata implying dereferenceability that are no longer valid/correct after
+ /// RewriteStatepointsForGC has run. This is because semantically, after
/// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire
- /// heap. stripNonValidAttributes (conservatively) restores correctness
- /// by erasing all attributes in the module that externally imply
- /// dereferenceability.
- /// Similar reasoning also applies to the noalias attributes. gc.statepoint
- /// can touch the entire heap including noalias objects.
- void stripNonValidAttributes(Module &M);
-
- // Helpers for stripNonValidAttributes
- void stripNonValidAttributesFromBody(Function &F);
+ /// heap. stripNonValidAttributesAndMetadata (conservatively) restores
+ /// correctness by erasing all attributes in the module that externally imply
+ /// dereferenceability. Similar reasoning also applies to the noalias
+ /// attributes and metadata. gc.statepoint can touch the entire heap including
+ /// noalias objects.
+ void stripNonValidAttributesAndMetadata(Module &M);
+
+ // Helpers for stripNonValidAttributesAndMetadata
+ void stripNonValidAttributesAndMetadataFromBody(Function &F);
void stripNonValidAttributesFromPrototype(Function &F);
+ // Certain metadata on instructions are invalid after running RS4GC.
+ // Optimizations that run after RS4GC can incorrectly use this metadata to
+ // optimize functions. We drop such metadata on the instruction.
+ void stripInvalidMetadataFromInstruction(Instruction &I);
};
} // namespace
@@ -2306,13 +2310,44 @@ RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) {
RemoveNonValidAttrAtIndex(Ctx, F, AttributeList::ReturnIndex);
}
-void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) {
+void RewriteStatepointsForGC::stripInvalidMetadataFromInstruction(Instruction &I) {
+
+ if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
+ return;
+ // These are the attributes that are still valid on loads and stores after
+ // RS4GC.
+ // The metadata implying dereferenceability and noalias are (conservatively)
+ // dropped. This is because semantically, after RewriteStatepointsForGC runs,
+ // all calls to gc.statepoint "free" the entire heap. Also, gc.statepoint can
+ // touch the entire heap including noalias objects. Note: The reasoning is
+ // same as stripping the dereferenceability and noalias attributes that are
+ // analogous to the metadata counterparts.
+ // We also drop the invariant.load metadata on the load because that metadata
+ // implies the address operand to the load points to memory that is never
+ // changed once it became dereferenceable. This is no longer true after RS4GC.
+ // Similar reasoning applies to invariant.group metadata, which applies to
+ // loads within a group.
+ unsigned ValidMetadataAfterRS4GC[] = {LLVMContext::MD_tbaa,
+ LLVMContext::MD_range,
+ LLVMContext::MD_alias_scope,
+ LLVMContext::MD_nontemporal,
+ LLVMContext::MD_nonnull,
+ LLVMContext::MD_align,
+ LLVMContext::MD_type};
+
+ // Drops all metadata on the instruction other than ValidMetadataAfterRS4GC.
+ I.dropUnknownNonDebugMetadata(ValidMetadataAfterRS4GC);
+
+}
+
+void RewriteStatepointsForGC::stripNonValidAttributesAndMetadataFromBody(Function &F) {
if (F.empty())
return;
LLVMContext &Ctx = F.getContext();
MDBuilder Builder(Ctx);
+
for (Instruction &I : instructions(F)) {
if (const MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) {
assert(MD->getNumOperands() < 5 && "unrecognized metadata shape!");
@@ -2333,6 +2368,8 @@ void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) {
I.setMetadata(LLVMContext::MD_tbaa, MutableTBAA);
}
+ stripInvalidMetadataFromInstruction(I);
+
if (CallSite CS = CallSite(&I)) {
for (int i = 0, e = CS.arg_size(); i != e; i++)
if (isa<PointerType>(CS.getArgument(i)->getType()))
@@ -2357,7 +2394,7 @@ static bool shouldRewriteStatepointsIn(Function &F) {
return false;
}
-void RewriteStatepointsForGC::stripNonValidAttributes(Module &M) {
+void RewriteStatepointsForGC::stripNonValidAttributesAndMetadata(Module &M) {
#ifndef NDEBUG
assert(any_of(M, shouldRewriteStatepointsIn) && "precondition!");
#endif
@@ -2366,7 +2403,7 @@ void RewriteStatepointsForGC::stripNonValidAttributes(Module &M) {
stripNonValidAttributesFromPrototype(F);
for (Function &F : M)
- stripNonValidAttributesFromBody(F);
+ stripNonValidAttributesAndMetadataFromBody(F);
}
bool RewriteStatepointsForGC::runOnFunction(Function &F) {
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 815492ac354c..c6929c33b3e9 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -515,10 +515,6 @@ private:
void visitCmpInst(CmpInst &I);
void visitExtractValueInst(ExtractValueInst &EVI);
void visitInsertValueInst(InsertValueInst &IVI);
- void visitLandingPadInst(LandingPadInst &I) { markOverdefined(&I); }
- void visitFuncletPadInst(FuncletPadInst &FPI) {
- markOverdefined(&FPI);
- }
void visitCatchSwitchInst(CatchSwitchInst &CPI) {
markOverdefined(&CPI);
visitTerminatorInst(CPI);
@@ -539,13 +535,6 @@ private:
void visitResumeInst (TerminatorInst &I) { /*returns void*/ }
void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
void visitFenceInst (FenceInst &I) { /*returns void*/ }
- void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
- markOverdefined(&I);
- }
- void visitAtomicRMWInst (AtomicRMWInst &I) { markOverdefined(&I); }
- void visitAllocaInst (Instruction &I) { markOverdefined(&I); }
- void visitVAArgInst (Instruction &I) { markOverdefined(&I); }
-
void visitInstruction(Instruction &I) {
// If a new instruction is added to LLVM that we don't handle.
DEBUG(dbgs() << "SCCP: Don't know how to handle: " << I << '\n');
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 24d28a6c2831..5d57ed9718fb 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -142,8 +142,139 @@ static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) {
return false;
}
-void CodeExtractor::findAllocas(ValueSet &SinkCands) const {
+static BasicBlock *getCommonExitBlock(const SetVector<BasicBlock *> &Blocks) {
+ BasicBlock *CommonExitBlock = nullptr;
+ auto hasNonCommonExitSucc = [&](BasicBlock *Block) {
+ for (auto *Succ : successors(Block)) {
+ // Internal edges, ok.
+ if (Blocks.count(Succ))
+ continue;
+ if (!CommonExitBlock) {
+ CommonExitBlock = Succ;
+ continue;
+ }
+ if (CommonExitBlock == Succ)
+ continue;
+
+ return true;
+ }
+ return false;
+ };
+
+ if (any_of(Blocks, hasNonCommonExitSucc))
+ return nullptr;
+
+ return CommonExitBlock;
+}
+
+bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
+ Instruction *Addr) const {
+ AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets());
+ Function *Func = (*Blocks.begin())->getParent();
+ for (BasicBlock &BB : *Func) {
+ if (Blocks.count(&BB))
+ continue;
+ for (Instruction &II : BB) {
+
+ if (isa<DbgInfoIntrinsic>(II))
+ continue;
+
+ unsigned Opcode = II.getOpcode();
+ Value *MemAddr = nullptr;
+ switch (Opcode) {
+ case Instruction::Store:
+ case Instruction::Load: {
+ if (Opcode == Instruction::Store) {
+ StoreInst *SI = cast<StoreInst>(&II);
+ MemAddr = SI->getPointerOperand();
+ } else {
+ LoadInst *LI = cast<LoadInst>(&II);
+ MemAddr = LI->getPointerOperand();
+ }
+ // Global variable can not be aliased with locals.
+ if (dyn_cast<Constant>(MemAddr))
+ break;
+ Value *Base = MemAddr->stripInBoundsConstantOffsets();
+ if (!dyn_cast<AllocaInst>(Base) || Base == AI)
+ return false;
+ break;
+ }
+ default: {
+ IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II);
+ if (IntrInst) {
+ if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
+ IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
+ break;
+ return false;
+ }
+ // Treat all the other cases conservatively if it has side effects.
+ if (II.mayHaveSideEffects())
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+BasicBlock *
+CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) {
+ BasicBlock *SinglePredFromOutlineRegion = nullptr;
+ assert(!Blocks.count(CommonExitBlock) &&
+ "Expect a block outside the region!");
+ for (auto *Pred : predecessors(CommonExitBlock)) {
+ if (!Blocks.count(Pred))
+ continue;
+ if (!SinglePredFromOutlineRegion) {
+ SinglePredFromOutlineRegion = Pred;
+ } else if (SinglePredFromOutlineRegion != Pred) {
+ SinglePredFromOutlineRegion = nullptr;
+ break;
+ }
+ }
+
+ if (SinglePredFromOutlineRegion)
+ return SinglePredFromOutlineRegion;
+
+#ifndef NDEBUG
+ auto getFirstPHI = [](BasicBlock *BB) {
+ BasicBlock::iterator I = BB->begin();
+ PHINode *FirstPhi = nullptr;
+ while (I != BB->end()) {
+ PHINode *Phi = dyn_cast<PHINode>(I);
+ if (!Phi)
+ break;
+ if (!FirstPhi) {
+ FirstPhi = Phi;
+ break;
+ }
+ }
+ return FirstPhi;
+ };
+ // If there are any phi nodes, the single pred either exists or has already
+ // be created before code extraction.
+ assert(!getFirstPHI(CommonExitBlock) && "Phi not expected");
+#endif
+
+ BasicBlock *NewExitBlock = CommonExitBlock->splitBasicBlock(
+ CommonExitBlock->getFirstNonPHI()->getIterator());
+
+ for (auto *Pred : predecessors(CommonExitBlock)) {
+ if (Blocks.count(Pred))
+ continue;
+ Pred->getTerminator()->replaceUsesOfWith(CommonExitBlock, NewExitBlock);
+ }
+ // Now add the old exit block to the outline region.
+ Blocks.insert(CommonExitBlock);
+ return CommonExitBlock;
+}
+
+void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands,
+ BasicBlock *&ExitBlock) const {
Function *Func = (*Blocks.begin())->getParent();
+ ExitBlock = getCommonExitBlock(Blocks);
+
for (BasicBlock &BB : *Func) {
if (Blocks.count(&BB))
continue;
@@ -152,49 +283,96 @@ void CodeExtractor::findAllocas(ValueSet &SinkCands) const {
if (!AI)
continue;
- // Returns true if matching life time markers are found within
- // the outlined region.
- auto GetLifeTimeMarkers = [&](Instruction *Addr) {
+ // Find the pair of life time markers for address 'Addr' that are either
+ // defined inside the outline region or can legally be shrinkwrapped into
+ // the outline region. If there are not other untracked uses of the
+ // address, return the pair of markers if found; otherwise return a pair
+ // of nullptr.
+ auto GetLifeTimeMarkers =
+ [&](Instruction *Addr, bool &SinkLifeStart,
+ bool &HoistLifeEnd) -> std::pair<Instruction *, Instruction *> {
Instruction *LifeStart = nullptr, *LifeEnd = nullptr;
- for (User *U : Addr->users()) {
- if (!definedInRegion(Blocks, U))
- return false;
+ for (User *U : Addr->users()) {
IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
if (IntrInst) {
- if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start)
+ if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) {
+ // Do not handle the case where AI has multiple start markers.
+ if (LifeStart)
+ return std::make_pair<Instruction *>(nullptr, nullptr);
LifeStart = IntrInst;
- if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
+ }
+ if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) {
+ if (LifeEnd)
+ return std::make_pair<Instruction *>(nullptr, nullptr);
LifeEnd = IntrInst;
+ }
+ continue;
}
+ // Find untracked uses of the address, bail.
+ if (!definedInRegion(Blocks, U))
+ return std::make_pair<Instruction *>(nullptr, nullptr);
}
- return LifeStart && LifeEnd;
+
+ if (!LifeStart || !LifeEnd)
+ return std::make_pair<Instruction *>(nullptr, nullptr);
+
+ SinkLifeStart = !definedInRegion(Blocks, LifeStart);
+ HoistLifeEnd = !definedInRegion(Blocks, LifeEnd);
+ // Do legality Check.
+ if ((SinkLifeStart || HoistLifeEnd) &&
+ !isLegalToShrinkwrapLifetimeMarkers(Addr))
+ return std::make_pair<Instruction *>(nullptr, nullptr);
+
+ // Check to see if we have a place to do hoisting, if not, bail.
+ if (HoistLifeEnd && !ExitBlock)
+ return std::make_pair<Instruction *>(nullptr, nullptr);
+
+ return std::make_pair(LifeStart, LifeEnd);
};
- if (GetLifeTimeMarkers(AI)) {
+ bool SinkLifeStart = false, HoistLifeEnd = false;
+ auto Markers = GetLifeTimeMarkers(AI, SinkLifeStart, HoistLifeEnd);
+
+ if (Markers.first) {
+ if (SinkLifeStart)
+ SinkCands.insert(Markers.first);
SinkCands.insert(AI);
+ if (HoistLifeEnd)
+ HoistCands.insert(Markers.second);
continue;
}
- // Follow the bitcast:
+ // Follow the bitcast.
Instruction *MarkerAddr = nullptr;
for (User *U : AI->users()) {
- if (U->stripPointerCasts() == AI) {
+
+ if (U->stripInBoundsConstantOffsets() == AI) {
+ SinkLifeStart = false;
+ HoistLifeEnd = false;
Instruction *Bitcast = cast<Instruction>(U);
- if (GetLifeTimeMarkers(Bitcast)) {
+ Markers = GetLifeTimeMarkers(Bitcast, SinkLifeStart, HoistLifeEnd);
+ if (Markers.first) {
MarkerAddr = Bitcast;
continue;
}
}
+
+ // Found unknown use of AI.
if (!definedInRegion(Blocks, U)) {
MarkerAddr = nullptr;
break;
}
}
+
if (MarkerAddr) {
+ if (SinkLifeStart)
+ SinkCands.insert(Markers.first);
if (!definedInRegion(Blocks, MarkerAddr))
SinkCands.insert(MarkerAddr);
SinkCands.insert(AI);
+ if (HoistLifeEnd)
+ HoistCands.insert(Markers.second);
}
}
}
@@ -780,7 +958,8 @@ Function *CodeExtractor::extractCodeRegion() {
if (!isEligible())
return nullptr;
- ValueSet inputs, outputs, SinkingCands;
+ ValueSet inputs, outputs, SinkingCands, HoistingCands;
+ BasicBlock *CommonExit = nullptr;
// Assumption: this is a single-entry code region, and the header is the first
// block in the region.
@@ -819,7 +998,8 @@ Function *CodeExtractor::extractCodeRegion() {
"newFuncRoot");
newFuncRoot->getInstList().push_back(BranchInst::Create(header));
- findAllocas(SinkingCands);
+ findAllocas(SinkingCands, HoistingCands, CommonExit);
+ assert(HoistingCands.empty() || CommonExit);
// Find inputs to, outputs from the code region.
findInputsOutputs(inputs, outputs, SinkingCands);
@@ -829,6 +1009,13 @@ Function *CodeExtractor::extractCodeRegion() {
cast<Instruction>(II)->moveBefore(*newFuncRoot,
newFuncRoot->getFirstInsertionPt());
+ if (!HoistingCands.empty()) {
+ auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit);
+ Instruction *TI = HoistToBlock->getTerminator();
+ for (auto *II : HoistingCands)
+ cast<Instruction>(II)->moveBefore(TI);
+ }
+
// Calculate the exit blocks for the extracted region and the total exit
// weights for each of those blocks.
DenseMap<BasicBlock *, BlockFrequency> ExitWeights;
diff --git a/lib/Transforms/Utils/PredicateInfo.cpp b/lib/Transforms/Utils/PredicateInfo.cpp
index 9e71cba4f1b7..1260e35e934d 100644
--- a/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/lib/Transforms/Utils/PredicateInfo.cpp
@@ -460,6 +460,9 @@ void PredicateInfo::buildPredicateInfo() {
if (auto *BI = dyn_cast<BranchInst>(BranchBB->getTerminator())) {
if (!BI->isConditional())
continue;
+ // Can't insert conditional information if they all go to the same place.
+ if (BI->getSuccessor(0) == BI->getSuccessor(1))
+ continue;
processBranch(BI, BranchBB, OpsToRename);
} else if (auto *SI = dyn_cast<SwitchInst>(BranchBB->getTerminator())) {
processSwitch(SI, BranchBB, OpsToRename);
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 02a5d3dbeadf..faa14046b1e3 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -352,7 +352,7 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
return false;
typedef const SCEV *(ScalarEvolution::*OperationFunctionTy)(
- const SCEV *, const SCEV *, SCEV::NoWrapFlags);
+ const SCEV *, const SCEV *, SCEV::NoWrapFlags, unsigned);
typedef const SCEV *(ScalarEvolution::*ExtensionFunctionTy)(
const SCEV *, Type *);
@@ -406,10 +406,11 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2);
const SCEV *A =
- (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap), WideTy);
+ (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0u),
+ WideTy);
const SCEV *B =
(SE->*Operation)((SE->*Extension)(LHS, WideTy),
- (SE->*Extension)(RHS, WideTy), SCEV::FlagAnyWrap);
+ (SE->*Extension)(RHS, WideTy), SCEV::FlagAnyWrap, 0u);
if (A != B)
return false;
@@ -530,8 +531,7 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
return false;
const SCEV *(ScalarEvolution::*GetExprForBO)(const SCEV *, const SCEV *,
- SCEV::NoWrapFlags);
-
+ SCEV::NoWrapFlags, unsigned);
switch (BO->getOpcode()) {
default:
return false;
@@ -560,7 +560,7 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
const SCEV *ExtendAfterOp = SE->getZeroExtendExpr(SE->getSCEV(BO), WideTy);
const SCEV *OpAfterExtend = (SE->*GetExprForBO)(
SE->getZeroExtendExpr(LHS, WideTy), SE->getZeroExtendExpr(RHS, WideTy),
- SCEV::FlagAnyWrap);
+ SCEV::FlagAnyWrap, 0u);
if (ExtendAfterOp == OpAfterExtend) {
BO->setHasNoUnsignedWrap();
SE->forgetValue(BO);
@@ -572,7 +572,7 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
const SCEV *ExtendAfterOp = SE->getSignExtendExpr(SE->getSCEV(BO), WideTy);
const SCEV *OpAfterExtend = (SE->*GetExprForBO)(
SE->getSignExtendExpr(LHS, WideTy), SE->getSignExtendExpr(RHS, WideTy),
- SCEV::FlagAnyWrap);
+ SCEV::FlagAnyWrap, 0u);
if (ExtendAfterOp == OpAfterExtend) {
BO->setHasNoSignedWrap();
SE->forgetValue(BO);