summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/ModuleSummaryAnalysis.cpp120
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp5
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp16
-rw-r--r--lib/Bitcode/Reader/BitstreamReader.cpp22
-rw-r--r--lib/Bitcode/Reader/MetadataLoader.cpp400
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp191
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp3
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp24
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCFIException.cpp2
-rw-r--r--lib/CodeGen/GlobalISel/IRTranslator.cpp44
-rw-r--r--lib/CodeGen/GlobalISel/RegisterBankInfo.cpp13
-rw-r--r--lib/CodeGen/IfConversion.cpp42
-rw-r--r--lib/CodeGen/MIRPrinter.cpp8
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp9
-rw-r--r--lib/CodeGen/MachineVerifier.cpp18
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp5
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp17
-rw-r--r--lib/DebugInfo/DWARF/DWARFDie.cpp5
-rw-r--r--lib/Fuzzer/FuzzerDriver.cpp1
-rw-r--r--lib/Fuzzer/FuzzerFlags.def1
-rw-r--r--lib/Fuzzer/FuzzerIO.h3
-rw-r--r--lib/Fuzzer/FuzzerIOPosix.cpp6
-rw-r--r--lib/Fuzzer/FuzzerIOWindows.cpp2
-rw-r--r--lib/Fuzzer/FuzzerInternal.h1
-rw-r--r--lib/Fuzzer/FuzzerLoop.cpp5
-rw-r--r--lib/Fuzzer/FuzzerMerge.cpp12
-rw-r--r--lib/Fuzzer/FuzzerOptions.h1
-rw-r--r--lib/Fuzzer/FuzzerTraceState.cpp47
-rw-r--r--lib/Fuzzer/FuzzerUtilPosix.cpp6
-rw-r--r--lib/Fuzzer/FuzzerUtilWindows.cpp1
-rw-r--r--lib/Fuzzer/test/merge.test8
-rw-r--r--lib/LTO/LTO.cpp41
-rw-r--r--lib/LTO/ThinLTOCodeGenerator.cpp42
-rw-r--r--lib/Support/APInt.cpp2
-rw-r--r--lib/Support/CMakeLists.txt1
-rw-r--r--lib/Support/Host.cpp20
-rw-r--r--lib/Support/TarWriter.cpp166
-rw-r--r--lib/Support/Unix/Signals.inc2
-rw-r--r--lib/Target/AArch64/AArch64CollectLOH.cpp1123
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp110
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp190
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h4
-rw-r--r--lib/Target/AArch64/AArch64InstructionSelector.cpp10
-rw-r--r--lib/Target/AArch64/AArch64InstructionSelector.h8
-rw-r--r--lib/Target/AArch64/AArch64MachineFunctionInfo.h48
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp1
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp38
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp7
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.h2
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp89
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.h9
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp13
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp27
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp3
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp10
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp7
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.h3
-rw-r--r--lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp48
-rw-r--r--lib/Target/Lanai/Disassembler/LanaiDisassembler.h7
-rw-r--r--lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h13
-rw-r--r--lib/Target/Lanai/LanaiISelLowering.cpp42
-rw-r--r--lib/Target/Lanai/LanaiRegisterInfo.h9
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp12
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp29
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp15
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h2
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td3
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp172
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp630
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.h3
-rw-r--r--lib/Transforms/IPO/FunctionImport.cpp182
-rw-r--r--lib/Transforms/IPO/LowerTypeTests.cpp316
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp14
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp96
-rw-r--r--lib/Transforms/Scalar/GVN.cpp14
-rw-r--r--lib/Transforms/Scalar/LICM.cpp37
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopSink.cpp3
-rw-r--r--lib/Transforms/Utils/FunctionImportUtils.cpp24
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp59
84 files changed, 2741 insertions, 2023 deletions
diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp
index 1d2ffc1abe1f..6387bb36166e 100644
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -80,10 +80,15 @@ static CalleeInfo::HotnessType getHotness(uint64_t ProfileCount,
return CalleeInfo::HotnessType::None;
}
-static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
- const Function &F, BlockFrequencyInfo *BFI,
- ProfileSummaryInfo *PSI,
- bool HasLocalsInUsed) {
+static bool isNonRenamableLocal(const GlobalValue &GV) {
+ return GV.hasSection() && GV.hasLocalLinkage();
+}
+
+static void
+computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
+ const Function &F, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI, bool HasLocalsInUsed,
+ DenseSet<GlobalValue::GUID> &CantBePromoted) {
// Summary not currently supported for anonymous functions, they should
// have been named.
assert(F.hasName());
@@ -178,37 +183,64 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
}
}
- GlobalValueSummary::GVFlags Flags(F);
+ bool NonRenamableLocal = isNonRenamableLocal(F);
+ bool NotEligibleForImport =
+ NonRenamableLocal || HasInlineAsmMaybeReferencingInternal ||
+ // Inliner doesn't handle variadic functions.
+ // FIXME: refactor this to use the same code that inliner is using.
+ F.isVarArg();
+ GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport,
+ /* LiveRoot = */ false);
auto FuncSummary = llvm::make_unique<FunctionSummary>(
Flags, NumInsts, RefEdges.takeVector(), CallGraphEdges.takeVector(),
TypeTests.takeVector());
- if (HasInlineAsmMaybeReferencingInternal)
- FuncSummary->setHasInlineAsmMaybeReferencingInternal();
+ if (NonRenamableLocal)
+ CantBePromoted.insert(F.getGUID());
Index.addGlobalValueSummary(F.getName(), std::move(FuncSummary));
}
-static void computeVariableSummary(ModuleSummaryIndex &Index,
- const GlobalVariable &V) {
+static void
+computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V,
+ DenseSet<GlobalValue::GUID> &CantBePromoted) {
SetVector<ValueInfo> RefEdges;
SmallPtrSet<const User *, 8> Visited;
findRefEdges(&V, RefEdges, Visited);
- GlobalValueSummary::GVFlags Flags(V);
+ bool NonRenamableLocal = isNonRenamableLocal(V);
+ GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal,
+ /* LiveRoot = */ false);
auto GVarSummary =
llvm::make_unique<GlobalVarSummary>(Flags, RefEdges.takeVector());
+ if (NonRenamableLocal)
+ CantBePromoted.insert(V.getGUID());
Index.addGlobalValueSummary(V.getName(), std::move(GVarSummary));
}
-static void computeAliasSummary(ModuleSummaryIndex &Index,
- const GlobalAlias &A) {
- GlobalValueSummary::GVFlags Flags(A);
+static void
+computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
+ DenseSet<GlobalValue::GUID> &CantBePromoted) {
+ bool NonRenamableLocal = isNonRenamableLocal(A);
+ GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal,
+ /* LiveRoot = */ false);
auto AS = llvm::make_unique<AliasSummary>(Flags, ArrayRef<ValueInfo>{});
auto *Aliasee = A.getBaseObject();
auto *AliaseeSummary = Index.getGlobalValueSummary(*Aliasee);
assert(AliaseeSummary && "Alias expects aliasee summary to be parsed");
AS->setAliasee(AliaseeSummary);
+ if (NonRenamableLocal)
+ CantBePromoted.insert(A.getGUID());
Index.addGlobalValueSummary(A.getName(), std::move(AS));
}
+// Set LiveRoot flag on entries matching the given value name.
+static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) {
+ auto SummaryList =
+ Index.findGlobalValueSummaryList(GlobalValue::getGUID(Name));
+ if (SummaryList == Index.end())
+ return;
+ for (auto &Summary : SummaryList->second)
+ Summary->setLiveRoot();
+}
+
ModuleSummaryIndex llvm::buildModuleSummaryIndex(
const Module &M,
std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback,
@@ -226,9 +258,12 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false);
// Next collect those in the llvm.compiler.used set.
collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ true);
+ DenseSet<GlobalValue::GUID> CantBePromoted;
for (auto *V : Used) {
- if (V->hasLocalLinkage())
+ if (V->hasLocalLinkage()) {
LocalsUsed.insert(V);
+ CantBePromoted.insert(V->getGUID());
+ }
}
// Compute summaries for all functions defined in module, and save in the
@@ -248,7 +283,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
BFI = BFIPtr.get();
}
- computeFunctionSummary(Index, M, F, BFI, PSI, !LocalsUsed.empty());
+ computeFunctionSummary(Index, M, F, BFI, PSI, !LocalsUsed.empty(),
+ CantBePromoted);
}
// Compute summaries for all variables defined in module, and save in the
@@ -256,20 +292,29 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
for (const GlobalVariable &G : M.globals()) {
if (G.isDeclaration())
continue;
- computeVariableSummary(Index, G);
+ computeVariableSummary(Index, G, CantBePromoted);
}
// Compute summaries for all aliases defined in module, and save in the
// index.
for (const GlobalAlias &A : M.aliases())
- computeAliasSummary(Index, A);
+ computeAliasSummary(Index, A, CantBePromoted);
for (auto *V : LocalsUsed) {
auto *Summary = Index.getGlobalValueSummary(*V);
assert(Summary && "Missing summary for global value");
- Summary->setNoRename();
+ Summary->setNotEligibleToImport();
}
+ // The linker doesn't know about these LLVM produced values, so we need
+ // to flag them as live in the index to ensure index-based dead value
+ // analysis treats them as live roots of the analysis.
+ setLiveRoot(Index, "llvm.used");
+ setLiveRoot(Index, "llvm.compiler.used");
+ setLiveRoot(Index, "llvm.global_ctors");
+ setLiveRoot(Index, "llvm.global_dtors");
+ setLiveRoot(Index, "llvm.global.annotations");
+
if (!M.getModuleInlineAsm().empty()) {
// Collect the local values defined by module level asm, and set up
// summaries for these symbols so that they can be marked as NoRename,
@@ -282,7 +327,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
// referenced from there.
ModuleSymbolTable::CollectAsmSymbols(
Triple(M.getTargetTriple()), M.getModuleInlineAsm(),
- [&M, &Index](StringRef Name, object::BasicSymbolRef::Flags Flags) {
+ [&M, &Index, &CantBePromoted](StringRef Name,
+ object::BasicSymbolRef::Flags Flags) {
// Symbols not marked as Weak or Global are local definitions.
if (Flags & (object::BasicSymbolRef::SF_Weak |
object::BasicSymbolRef::SF_Global))
@@ -291,11 +337,10 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
if (!GV)
return;
assert(GV->isDeclaration() && "Def in module asm already has definition");
- GlobalValueSummary::GVFlags GVFlags(
- GlobalValue::InternalLinkage,
- /* NoRename */ true,
- /* HasInlineAsmMaybeReferencingInternal */ false,
- /* IsNotViableToInline */ true);
+ GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage,
+ /* NotEligibleToImport */ true,
+ /* LiveRoot */ true);
+ CantBePromoted.insert(GlobalValue::getGUID(Name));
// Create the appropriate summary type.
if (isa<Function>(GV)) {
std::unique_ptr<FunctionSummary> Summary =
@@ -303,18 +348,41 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
GVFlags, 0, ArrayRef<ValueInfo>{},
ArrayRef<FunctionSummary::EdgeTy>{},
ArrayRef<GlobalValue::GUID>{});
- Summary->setNoRename();
Index.addGlobalValueSummary(Name, std::move(Summary));
} else {
std::unique_ptr<GlobalVarSummary> Summary =
llvm::make_unique<GlobalVarSummary>(GVFlags,
ArrayRef<ValueInfo>{});
- Summary->setNoRename();
Index.addGlobalValueSummary(Name, std::move(Summary));
}
});
}
+ for (auto &GlobalList : Index) {
+ assert(GlobalList.second.size() == 1 &&
+ "Expected module's index to have one summary per GUID");
+ auto &Summary = GlobalList.second[0];
+ bool AllRefsCanBeExternallyReferenced =
+ llvm::all_of(Summary->refs(), [&](const ValueInfo &VI) {
+ return !CantBePromoted.count(VI.getValue()->getGUID());
+ });
+ if (!AllRefsCanBeExternallyReferenced) {
+ Summary->setNotEligibleToImport();
+ continue;
+ }
+
+ if (auto *FuncSummary = dyn_cast<FunctionSummary>(Summary.get())) {
+ bool AllCallsCanBeExternallyReferenced = llvm::all_of(
+ FuncSummary->calls(), [&](const FunctionSummary::EdgeTy &Edge) {
+ auto GUID = Edge.first.isGUID() ? Edge.first.getGUID()
+ : Edge.first.getValue()->getGUID();
+ return !CantBePromoted.count(GUID);
+ });
+ if (!AllCallsCanBeExternallyReferenced)
+ Summary->setNotEligibleToImport();
+ }
+ }
+
return Index;
}
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 2a15b9b264e3..cd8c24630df1 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -389,8 +389,9 @@ unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
}
int TargetTransformInfo::getAddressComputationCost(Type *Tp,
- bool IsComplex) const {
- int Cost = TTIImpl->getAddressComputationCost(Tp, IsComplex);
+ ScalarEvolution *SE,
+ const SCEV *Ptr) const {
+ int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 03aefcf57118..d9e249aad21d 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -801,12 +801,12 @@ static GlobalValueSummary::GVFlags getDecodedGVSummaryFlags(uint64_t RawFlags,
// to getDecodedLinkage() will need to be taken into account here as above.
auto Linkage = GlobalValue::LinkageTypes(RawFlags & 0xF); // 4 bits
RawFlags = RawFlags >> 4;
- bool NoRename = RawFlags & 0x1;
- bool IsNotViableToInline = RawFlags & 0x2;
- bool HasInlineAsmMaybeReferencingInternal = RawFlags & 0x4;
- return GlobalValueSummary::GVFlags(Linkage, NoRename,
- HasInlineAsmMaybeReferencingInternal,
- IsNotViableToInline);
+ bool NotEligibleToImport = (RawFlags & 0x1) || Version < 3;
+ // The LiveRoot flag wasn't introduced until version 3. For dead stripping
+ // to work correctly on earlier versions, we must conservatively treat all
+ // values as live.
+ bool LiveRoot = (RawFlags & 0x2) || Version < 3;
+ return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, LiveRoot);
}
static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) {
@@ -4838,9 +4838,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
}
const uint64_t Version = Record[0];
const bool IsOldProfileFormat = Version == 1;
- if (!IsOldProfileFormat && Version != 2)
+ if (Version < 1 || Version > 3)
return error("Invalid summary version " + Twine(Version) +
- ", 1 or 2 expected");
+ ", 1, 2 or 3 expected");
Record.clear();
// Keep around the last seen summary to be used when we see an optional
diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp
index 43c9aebd79ef..771cf3d927bc 100644
--- a/lib/Bitcode/Reader/BitstreamReader.cpp
+++ b/lib/Bitcode/Reader/BitstreamReader.cpp
@@ -93,20 +93,29 @@ static void skipAbbreviatedField(BitstreamCursor &Cursor,
}
/// skipRecord - Read the current record and discard it.
-void BitstreamCursor::skipRecord(unsigned AbbrevID) {
+unsigned BitstreamCursor::skipRecord(unsigned AbbrevID) {
// Skip unabbreviated records by reading past their entries.
if (AbbrevID == bitc::UNABBREV_RECORD) {
unsigned Code = ReadVBR(6);
- (void)Code;
unsigned NumElts = ReadVBR(6);
for (unsigned i = 0; i != NumElts; ++i)
(void)ReadVBR64(6);
- return;
+ return Code;
}
const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
+ const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
+ unsigned Code;
+ if (CodeOp.isLiteral())
+ Code = CodeOp.getLiteralValue();
+ else {
+ if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
+ CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
+ report_fatal_error("Abbreviation starts with an Array or a Blob");
+ Code = readAbbreviatedField(*this, CodeOp);
+ }
- for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) {
+ for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) {
const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
if (Op.isLiteral())
continue;
@@ -164,6 +173,7 @@ void BitstreamCursor::skipRecord(unsigned AbbrevID) {
// Skip over the blob.
JumpToBit(NewEnd);
}
+ return Code;
}
unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
@@ -273,7 +283,7 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
}
void BitstreamCursor::ReadAbbrevRecord() {
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
unsigned NumOpInfo = ReadVBR(5);
for (unsigned i = 0; i != NumOpInfo; ++i) {
bool IsLiteral = Read(1);
@@ -307,7 +317,7 @@ void BitstreamCursor::ReadAbbrevRecord() {
if (Abbv->getNumOperandInfos() == 0)
report_fatal_error("Abbrev record with no operands");
- CurAbbrevs.push_back(Abbv);
+ CurAbbrevs.push_back(std::move(Abbv));
}
Optional<BitstreamBlockInfo>
diff --git a/lib/Bitcode/Reader/MetadataLoader.cpp b/lib/Bitcode/Reader/MetadataLoader.cpp
index 5da421a79b7b..460d39cc28d8 100644
--- a/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -14,10 +14,12 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
@@ -86,12 +88,23 @@
using namespace llvm;
+#define DEBUG_TYPE "bitcode-reader"
+
+STATISTIC(NumMDStringLoaded, "Number of MDStrings loaded");
+STATISTIC(NumMDNodeTemporary, "Number of MDNode::Temporary created");
+STATISTIC(NumMDRecordLoaded, "Number of Metadata records loaded");
+
/// Flag whether we need to import full type definitions for ThinLTO.
/// Currently needed for Darwin and LLDB.
static cl::opt<bool> ImportFullTypeDefinitions(
"import-full-type-definitions", cl::init(false), cl::Hidden,
cl::desc("Import full type definitions for ThinLTO."));
+static cl::opt<bool> DisableLazyLoading(
+ "disable-ondemand-mds-loading", cl::init(false), cl::Hidden,
+ cl::desc("Force disable the lazy-loading on-demand of metadata when "
+ "loading bitcode for importing."));
+
namespace {
static int64_t unrotateSign(uint64_t U) { return U & 1 ? ~(U >> 1) : U >> 1; }
@@ -165,6 +178,10 @@ public:
void assignValue(Metadata *MD, unsigned Idx);
void tryToResolveCycles();
bool hasFwdRefs() const { return !ForwardReference.empty(); }
+ int getNextFwdRef() {
+ assert(hasFwdRefs());
+ return *ForwardReference.begin();
+ }
/// Upgrade a type that had an MDString reference.
void addTypeRef(MDString &UUID, DICompositeType &CT);
@@ -215,6 +232,7 @@ Metadata *BitcodeReaderMetadataList::getMetadataFwdRef(unsigned Idx) {
ForwardReference.insert(Idx);
// Create and return a placeholder, which will later be RAUW'd.
+ ++NumMDNodeTemporary;
Metadata *MD = MDNode::getTemporary(Context, None).release();
MetadataPtrs[Idx].reset(MD);
return MD;
@@ -340,8 +358,26 @@ class PlaceholderQueue {
std::deque<DistinctMDOperandPlaceholder> PHs;
public:
+ bool empty() { return PHs.empty(); }
DistinctMDOperandPlaceholder &getPlaceholderOp(unsigned ID);
void flush(BitcodeReaderMetadataList &MetadataList);
+
+ /// Return the list of temporaries nodes in the queue, these need to be
+ /// loaded before we can flush the queue.
+ void getTemporaries(BitcodeReaderMetadataList &MetadataList,
+ DenseSet<unsigned> &Temporaries) {
+ for (auto &PH : PHs) {
+ auto ID = PH.getID();
+ auto *MD = MetadataList.lookup(ID);
+ if (!MD) {
+ Temporaries.insert(ID);
+ continue;
+ }
+ auto *N = dyn_cast_or_null<MDNode>(MD);
+ if (N && N->isTemporary())
+ Temporaries.insert(ID);
+ }
+ }
};
} // end anonymous namespace
@@ -375,6 +411,30 @@ class MetadataLoader::MetadataLoaderImpl {
Module &TheModule;
std::function<Type *(unsigned)> getTypeByID;
+ /// Cursor associated with the lazy-loading of Metadata. This is the easy way
+ /// to keep around the right "context" (Abbrev list) to be able to jump in
+ /// the middle of the metadata block and load any record.
+ BitstreamCursor IndexCursor;
+
+ /// Index that keeps track of MDString values.
+ std::vector<StringRef> MDStringRef;
+
+ /// On-demand loading of a single MDString. Requires the index above to be
+ /// populated.
+ MDString *lazyLoadOneMDString(unsigned Idx);
+
+ /// Index that keeps track of where to find a metadata record in the stream.
+ std::vector<uint64_t> GlobalMetadataBitPosIndex;
+
+ /// Populate the index above to enable lazily loading of metadata, and load
+ /// the named metadata as well as the transitively referenced global
+ /// Metadata.
+ Expected<bool> lazyLoadModuleMetadataBlock(PlaceholderQueue &Placeholders);
+
+ /// On-demand loading of a single metadata. Requires the index above to be
+ /// populated.
+ void lazyLoadOneMetadata(unsigned Idx, PlaceholderQueue &Placeholders);
+
// Keep mapping of seens pair of old-style CU <-> SP, and update pointers to
// point from SP to CU after a block is completly parsed.
std::vector<std::pair<DICompileUnit *, Metadata *>> CUSubprograms;
@@ -394,13 +454,25 @@ class MetadataLoader::MetadataLoaderImpl {
Error parseOneMetadata(SmallVectorImpl<uint64_t> &Record, unsigned Code,
PlaceholderQueue &Placeholders, StringRef Blob,
- bool ModuleLevel, unsigned &NextMetadataNo);
+ unsigned &NextMetadataNo);
Error parseMetadataStrings(ArrayRef<uint64_t> Record, StringRef Blob,
- unsigned &NextMetadataNo);
+ std::function<void(StringRef)> CallBack);
Error parseGlobalObjectAttachment(GlobalObject &GO,
ArrayRef<uint64_t> Record);
Error parseMetadataKindRecord(SmallVectorImpl<uint64_t> &Record);
+ void resolveForwardRefsAndPlaceholders(PlaceholderQueue &Placeholders);
+
+ /// Upgrade old-style CU <-> SP pointers to point from SP to CU.
+ void upgradeCUSubprograms() {
+ for (auto CU_SP : CUSubprograms)
+ if (auto *SPs = dyn_cast_or_null<MDTuple>(CU_SP.second))
+ for (auto &Op : SPs->operands())
+ if (auto *SP = dyn_cast_or_null<MDNode>(Op))
+ SP->replaceOperandWith(7, CU_SP.first);
+ CUSubprograms.clear();
+ }
+
public:
MetadataLoaderImpl(BitstreamCursor &Stream, Module &TheModule,
BitcodeReaderValueList &ValueList,
@@ -444,20 +516,217 @@ Error error(const Twine &Message) {
Message, make_error_code(BitcodeError::CorruptedBitcode));
}
+Expected<bool> MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock(
+ PlaceholderQueue &Placeholders) {
+ IndexCursor = Stream;
+ SmallVector<uint64_t, 64> Record;
+ // Get the abbrevs, and preload record positions to make them lazy-loadable.
+ while (true) {
+ BitstreamEntry Entry = IndexCursor.advanceSkippingSubblocks(
+ BitstreamCursor::AF_DontPopBlockAtEnd);
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ return error("Malformed block");
+ case BitstreamEntry::EndBlock: {
+ return true;
+ }
+ case BitstreamEntry::Record: {
+ // The interesting case.
+ ++NumMDRecordLoaded;
+ uint64_t CurrentPos = IndexCursor.GetCurrentBitNo();
+ auto Code = IndexCursor.skipRecord(Entry.ID);
+ switch (Code) {
+ case bitc::METADATA_STRINGS: {
+ // Rewind and parse the strings.
+ IndexCursor.JumpToBit(CurrentPos);
+ StringRef Blob;
+ Record.clear();
+ IndexCursor.readRecord(Entry.ID, Record, &Blob);
+ unsigned NumStrings = Record[0];
+ MDStringRef.reserve(NumStrings);
+ auto IndexNextMDString = [&](StringRef Str) {
+ MDStringRef.push_back(Str);
+ };
+ if (auto Err = parseMetadataStrings(Record, Blob, IndexNextMDString))
+ return std::move(Err);
+ break;
+ }
+ case bitc::METADATA_INDEX_OFFSET: {
+ // This is the offset to the index, when we see this we skip all the
+ // records and load only an index to these.
+ IndexCursor.JumpToBit(CurrentPos);
+ Record.clear();
+ IndexCursor.readRecord(Entry.ID, Record);
+ if (Record.size() != 2)
+ return error("Invalid record");
+ auto Offset = Record[0] + (Record[1] << 32);
+ auto BeginPos = IndexCursor.GetCurrentBitNo();
+ IndexCursor.JumpToBit(BeginPos + Offset);
+ Entry = IndexCursor.advanceSkippingSubblocks(
+ BitstreamCursor::AF_DontPopBlockAtEnd);
+ assert(Entry.Kind == BitstreamEntry::Record &&
+ "Corrupted bitcode: Expected `Record` when trying to find the "
+ "Metadata index");
+ Record.clear();
+ auto Code = IndexCursor.readRecord(Entry.ID, Record);
+ (void)Code;
+ assert(Code == bitc::METADATA_INDEX && "Corrupted bitcode: Expected "
+ "`METADATA_INDEX` when trying "
+ "to find the Metadata index");
+
+ // Delta unpack
+ auto CurrentValue = BeginPos;
+ GlobalMetadataBitPosIndex.reserve(Record.size());
+ for (auto &Elt : Record) {
+ CurrentValue += Elt;
+ GlobalMetadataBitPosIndex.push_back(CurrentValue);
+ }
+ break;
+ }
+ case bitc::METADATA_INDEX:
+ // We don't expect to get there, the Index is loaded when we encounter
+ // the offset.
+ return error("Corrupted Metadata block");
+ case bitc::METADATA_NAME: {
+ // Named metadata need to be materialized now and aren't deferred.
+ IndexCursor.JumpToBit(CurrentPos);
+ Record.clear();
+ unsigned Code = IndexCursor.readRecord(Entry.ID, Record);
+ assert(Code == bitc::METADATA_NAME);
+
+ // Read name of the named metadata.
+ SmallString<8> Name(Record.begin(), Record.end());
+ Code = IndexCursor.ReadCode();
+
+ // Named Metadata comes in two parts, we expect the name to be followed
+ // by the node
+ Record.clear();
+ unsigned NextBitCode = IndexCursor.readRecord(Code, Record);
+ assert(NextBitCode == bitc::METADATA_NAMED_NODE);
+ (void)NextBitCode;
+
+ // Read named metadata elements.
+ unsigned Size = Record.size();
+ NamedMDNode *NMD = TheModule.getOrInsertNamedMetadata(Name);
+ for (unsigned i = 0; i != Size; ++i) {
+ // FIXME: We could use a placeholder here, however NamedMDNode are
+ // taking MDNode as operand and not using the Metadata infrastructure.
+ // It is acknowledged by 'TODO: Inherit from Metadata' in the
+ // NamedMDNode class definition.
+ MDNode *MD = MetadataList.getMDNodeFwdRefOrNull(Record[i]);
+ assert(MD && "Invalid record");
+ NMD->addOperand(MD);
+ }
+ break;
+ }
+ case bitc::METADATA_GLOBAL_DECL_ATTACHMENT: {
+ // FIXME: we need to do this early because we don't materialize global
+ // value explicitly.
+ IndexCursor.JumpToBit(CurrentPos);
+ Record.clear();
+ IndexCursor.readRecord(Entry.ID, Record);
+ if (Record.size() % 2 == 0)
+ return error("Invalid record");
+ unsigned ValueID = Record[0];
+ if (ValueID >= ValueList.size())
+ return error("Invalid record");
+ if (auto *GO = dyn_cast<GlobalObject>(ValueList[ValueID]))
+ if (Error Err = parseGlobalObjectAttachment(
+ *GO, ArrayRef<uint64_t>(Record).slice(1)))
+ return std::move(Err);
+ break;
+ }
+ case bitc::METADATA_KIND:
+ case bitc::METADATA_STRING_OLD:
+ case bitc::METADATA_OLD_FN_NODE:
+ case bitc::METADATA_OLD_NODE:
+ case bitc::METADATA_VALUE:
+ case bitc::METADATA_DISTINCT_NODE:
+ case bitc::METADATA_NODE:
+ case bitc::METADATA_LOCATION:
+ case bitc::METADATA_GENERIC_DEBUG:
+ case bitc::METADATA_SUBRANGE:
+ case bitc::METADATA_ENUMERATOR:
+ case bitc::METADATA_BASIC_TYPE:
+ case bitc::METADATA_DERIVED_TYPE:
+ case bitc::METADATA_COMPOSITE_TYPE:
+ case bitc::METADATA_SUBROUTINE_TYPE:
+ case bitc::METADATA_MODULE:
+ case bitc::METADATA_FILE:
+ case bitc::METADATA_COMPILE_UNIT:
+ case bitc::METADATA_SUBPROGRAM:
+ case bitc::METADATA_LEXICAL_BLOCK:
+ case bitc::METADATA_LEXICAL_BLOCK_FILE:
+ case bitc::METADATA_NAMESPACE:
+ case bitc::METADATA_MACRO:
+ case bitc::METADATA_MACRO_FILE:
+ case bitc::METADATA_TEMPLATE_TYPE:
+ case bitc::METADATA_TEMPLATE_VALUE:
+ case bitc::METADATA_GLOBAL_VAR:
+ case bitc::METADATA_LOCAL_VAR:
+ case bitc::METADATA_EXPRESSION:
+ case bitc::METADATA_OBJC_PROPERTY:
+ case bitc::METADATA_IMPORTED_ENTITY:
+ case bitc::METADATA_GLOBAL_VAR_EXPR:
+ // We don't expect to see any of these, if we see one, give up on
+ // lazy-loading and fallback.
+ MDStringRef.clear();
+ GlobalMetadataBitPosIndex.clear();
+ return false;
+ }
+ break;
+ }
+ }
+ }
+}
+
/// Parse a METADATA_BLOCK. If ModuleLevel is true then we are parsing
/// module level metadata.
Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
if (!ModuleLevel && MetadataList.hasFwdRefs())
return error("Invalid metadata: fwd refs into function blocks");
+ // Record the entry position so that we can jump back here and efficiently
+ // skip the whole block in case we lazy-load.
+ auto EntryPos = Stream.GetCurrentBitNo();
+
if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
return error("Invalid record");
- unsigned NextMetadataNo = MetadataList.size();
SmallVector<uint64_t, 64> Record;
-
PlaceholderQueue Placeholders;
+ // We lazy-load module-level metadata: we build an index for each record, and
+ // then load individual record as needed, starting with the named metadata.
+ if (ModuleLevel && IsImporting && MetadataList.empty() &&
+ !DisableLazyLoading) {
+ auto SuccessOrErr = lazyLoadModuleMetadataBlock(Placeholders);
+ if (!SuccessOrErr)
+ return SuccessOrErr.takeError();
+ if (SuccessOrErr.get()) {
+ // An index was successfully created and we will be able to load metadata
+ // on-demand.
+ MetadataList.resize(MDStringRef.size() +
+ GlobalMetadataBitPosIndex.size());
+
+ // Reading the named metadata created forward references and/or
+ // placeholders, that we flush here.
+ resolveForwardRefsAndPlaceholders(Placeholders);
+ upgradeCUSubprograms();
+ // Return at the beginning of the block, since it is easy to skip it
+ // entirely from there.
+ Stream.ReadBlockEnd(); // Pop the abbrev block context.
+ Stream.JumpToBit(EntryPos);
+ if (Stream.SkipBlock())
+ return error("Invalid record");
+ return Error::success();
+ }
+ // Couldn't load an index, fallback to loading all the block "old-style".
+ }
+
+ unsigned NextMetadataNo = MetadataList.size();
+
// Read all the records.
while (true) {
BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
@@ -467,16 +736,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
case BitstreamEntry::Error:
return error("Malformed block");
case BitstreamEntry::EndBlock:
- // Upgrade old-style CU <-> SP pointers to point from SP to CU.
- for (auto CU_SP : CUSubprograms)
- if (auto *SPs = dyn_cast_or_null<MDTuple>(CU_SP.second))
- for (auto &Op : SPs->operands())
- if (auto *SP = dyn_cast_or_null<MDNode>(Op))
- SP->replaceOperandWith(7, CU_SP.first);
- CUSubprograms.clear();
-
- MetadataList.tryToResolveCycles();
- Placeholders.flush(MetadataList);
+ resolveForwardRefsAndPlaceholders(Placeholders);
+ upgradeCUSubprograms();
return Error::success();
case BitstreamEntry::Record:
// The interesting case.
@@ -486,20 +747,86 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
// Read a record.
Record.clear();
StringRef Blob;
+ ++NumMDRecordLoaded;
unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob);
- if (Error Err = parseOneMetadata(Record, Code, Placeholders, Blob,
- ModuleLevel, NextMetadataNo))
+ if (Error Err =
+ parseOneMetadata(Record, Code, Placeholders, Blob, NextMetadataNo))
return Err;
}
}
+MDString *MetadataLoader::MetadataLoaderImpl::lazyLoadOneMDString(unsigned ID) {
+ ++NumMDStringLoaded;
+ if (Metadata *MD = MetadataList.lookup(ID))
+ return cast<MDString>(MD);
+ auto MDS = MDString::get(Context, MDStringRef[ID]);
+ MetadataList.assignValue(MDS, ID);
+ return MDS;
+}
+
+void MetadataLoader::MetadataLoaderImpl::lazyLoadOneMetadata(
+ unsigned ID, PlaceholderQueue &Placeholders) {
+ assert(ID < (MDStringRef.size()) + GlobalMetadataBitPosIndex.size());
+ assert(ID >= MDStringRef.size() && "Unexpected lazy-loading of MDString");
+#ifndef NDEBUG
+ // Lookup first if the metadata hasn't already been loaded.
+ if (auto *MD = MetadataList.lookup(ID)) {
+ auto *N = dyn_cast_or_null<MDNode>(MD);
+ assert(N && N->isTemporary() && "Lazy loading an already loaded metadata");
+ }
+#endif
+ SmallVector<uint64_t, 64> Record;
+ StringRef Blob;
+ IndexCursor.JumpToBit(GlobalMetadataBitPosIndex[ID - MDStringRef.size()]);
+ auto Entry = IndexCursor.advanceSkippingSubblocks();
+ ++NumMDRecordLoaded;
+ unsigned Code = IndexCursor.readRecord(Entry.ID, Record, &Blob);
+ if (Error Err = parseOneMetadata(Record, Code, Placeholders, Blob, ID))
+ report_fatal_error("Can't lazyload MD");
+}
+
+/// Ensure that all forward-references and placeholders are resolved.
+/// Iteratively lazy-loading metadata on-demand if needed.
+void MetadataLoader::MetadataLoaderImpl::resolveForwardRefsAndPlaceholders(
+ PlaceholderQueue &Placeholders) {
+ DenseSet<unsigned> Temporaries;
+ while (1) {
+ // Populate Temporaries with the placeholders that haven't been loaded yet.
+ Placeholders.getTemporaries(MetadataList, Temporaries);
+
+ // If we don't have any temporary, or FwdReference, we're done!
+ if (Temporaries.empty() && !MetadataList.hasFwdRefs())
+ break;
+
+ // First, load all the temporaries. This can add new placeholders or
+ // forward references.
+ for (auto ID : Temporaries)
+ lazyLoadOneMetadata(ID, Placeholders);
+ Temporaries.clear();
+
+ // Second, load the forward-references. This can also add new placeholders
+ // or forward references.
+ while (MetadataList.hasFwdRefs())
+ lazyLoadOneMetadata(MetadataList.getNextFwdRef(), Placeholders);
+ }
+ // At this point we don't have any forward reference remaining, or temporary
+ // that haven't been loaded. We can safely drop RAUW support and mark cycles
+ // as resolved.
+ MetadataList.tryToResolveCycles();
+
+ // Finally, everything is in place, we can replace the placeholders operands
+ // with the final node they refer to.
+ Placeholders.flush(MetadataList);
+}
+
Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
SmallVectorImpl<uint64_t> &Record, unsigned Code,
- PlaceholderQueue &Placeholders, StringRef Blob, bool ModuleLevel,
- unsigned &NextMetadataNo) {
+ PlaceholderQueue &Placeholders, StringRef Blob, unsigned &NextMetadataNo) {
bool IsDistinct = false;
auto getMD = [&](unsigned ID) -> Metadata * {
+ if (ID < MDStringRef.size())
+ return lazyLoadOneMDString(ID);
if (!IsDistinct)
return MetadataList.getMetadataFwdRef(ID);
if (auto *MD = MetadataList.getMetadataIfResolved(ID))
@@ -519,7 +846,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
auto getMDString = [&](unsigned ID) -> MDString * {
// This requires that the ID is not really a forward reference. In
// particular, the MDString must already have been resolved.
- return cast_or_null<MDString>(getMDOrNull(ID));
+ auto MDS = getMDOrNull(ID);
+ return cast_or_null<MDString>(MDS);
};
// Support for old type refs.
@@ -539,6 +867,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
Record.clear();
Code = Stream.ReadCode();
+ ++NumMDRecordLoaded;
unsigned NextBitCode = Stream.readRecord(Code, Record);
if (NextBitCode != bitc::METADATA_NAMED_NODE)
return error("METADATA_NAME not followed by METADATA_NAMED_NODE");
@@ -1137,15 +1466,20 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
// Test for upgrading !llvm.loop.
HasSeenOldLoopTags |= mayBeOldLoopAttachmentTag(String);
-
+ ++NumMDStringLoaded;
Metadata *MD = MDString::get(Context, String);
MetadataList.assignValue(MD, NextMetadataNo++);
break;
}
- case bitc::METADATA_STRINGS:
- if (Error Err = parseMetadataStrings(Record, Blob, NextMetadataNo))
+ case bitc::METADATA_STRINGS: {
+ auto CreateNextMDString = [&](StringRef Str) {
+ ++NumMDStringLoaded;
+ MetadataList.assignValue(MDString::get(Context, Str), NextMetadataNo++);
+ };
+ if (Error Err = parseMetadataStrings(Record, Blob, CreateNextMDString))
return Err;
break;
+ }
case bitc::METADATA_GLOBAL_DECL_ATTACHMENT: {
if (Record.size() % 2 == 0)
return error("Invalid record");
@@ -1166,12 +1500,13 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
break;
}
}
-#undef GET_OR_DISTINCT
return Error::success();
+#undef GET_OR_DISTINCT
}
Error MetadataLoader::MetadataLoaderImpl::parseMetadataStrings(
- ArrayRef<uint64_t> Record, StringRef Blob, unsigned &NextMetadataNo) {
+ ArrayRef<uint64_t> Record, StringRef Blob,
+ std::function<void(StringRef)> CallBack) {
// All the MDStrings in the block are emitted together in a single
// record. The strings are concatenated and stored in a blob along with
// their sizes.
@@ -1197,8 +1532,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataStrings(
if (Strings.size() < Size)
return error("Invalid record: metadata strings truncated chars");
- MetadataList.assignValue(MDString::get(Context, Strings.slice(0, Size)),
- NextMetadataNo++);
+ CallBack(Strings.slice(0, Size));
Strings = Strings.drop_front(Size);
} while (--NumStrings);
@@ -1228,6 +1562,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
SmallVector<uint64_t, 64> Record;
+ PlaceholderQueue Placeholders;
+
while (true) {
BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
@@ -1236,6 +1572,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
case BitstreamEntry::Error:
return error("Malformed block");
case BitstreamEntry::EndBlock:
+ resolveForwardRefsAndPlaceholders(Placeholders);
return Error::success();
case BitstreamEntry::Record:
// The interesting case.
@@ -1244,6 +1581,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
// Read a metadata attachment record.
Record.clear();
+ ++NumMDRecordLoaded;
switch (Stream.readRecord(Entry.ID, Record)) {
default: // Default behavior: ignore.
break;
@@ -1268,7 +1606,14 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
if (I->second == LLVMContext::MD_tbaa && StripTBAA)
continue;
- Metadata *Node = MetadataList.getMetadataFwdRef(Record[i + 1]);
+ auto Idx = Record[i + 1];
+ if (Idx < (MDStringRef.size() + GlobalMetadataBitPosIndex.size()) &&
+ !MetadataList.lookup(Idx))
+ // Load the attachment if it is in the lazy-loadable range and hasn't
+ // been loaded yet.
+ lazyLoadOneMetadata(Idx, Placeholders);
+
+ Metadata *Node = MetadataList.getMetadataFwdRef(Idx);
if (isa<LocalAsMetadata>(Node))
// Drop the attachment. This used to be legal, but there's no
// upgrade path.
@@ -1331,6 +1676,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataKinds() {
// Read a record.
Record.clear();
+ ++NumMDRecordLoaded;
unsigned Code = Stream.readRecord(Entry.ID, Record);
switch (Code) {
default: // Default behavior: ignore.
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index c10ba2399e71..ebb2022551f7 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -784,53 +784,53 @@ void ModuleBitcodeWriter::writeTypeTable() {
uint64_t NumBits = VE.computeBitsRequiredForTypeIndicies();
// Abbrev for TYPE_CODE_POINTER.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
Abbv->Add(BitCodeAbbrevOp(0)); // Addrspace = 0
- unsigned PtrAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned PtrAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for TYPE_CODE_FUNCTION.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isvararg
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
- unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned FunctionAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for TYPE_CODE_STRUCT_ANON.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_ANON));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
- unsigned StructAnonAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned StructAnonAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for TYPE_CODE_STRUCT_NAME.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAME));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
- unsigned StructNameAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned StructNameAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for TYPE_CODE_STRUCT_NAMED.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAMED));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
- unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned StructNamedAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for TYPE_CODE_ARRAY.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // size
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
- unsigned ArrayAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned ArrayAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Emit an entry count so the reader can reserve space.
TypeVals.push_back(TypeList.size());
@@ -971,9 +971,8 @@ static unsigned getEncodedLinkage(const GlobalValue &GV) {
static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags) {
uint64_t RawFlags = 0;
- RawFlags |= Flags.NoRename; // bool
- RawFlags |= (Flags.IsNotViableToInline << 1);
- RawFlags |= (Flags.HasInlineAsmMaybeReferencingInternal << 2);
+ RawFlags |= Flags.NotEligibleToImport; // bool
+ RawFlags |= (Flags.LiveRoot << 1);
// Linkage don't need to be remapped at that time for the summary. Any future
// change to the getEncodedLinkage() function will need to be taken into
// account here as well.
@@ -1059,13 +1058,13 @@ void BitcodeWriterBase::writeValueSymbolTableForwardDecl() {
// which is written after the function blocks so that it can include
// the offset of each function. The placeholder offset will be
// updated when the real VST is written.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_VSTOFFSET));
// Blocks are 32-bit aligned, so we can use a 32-bit word offset to
// hold the real VST offset. Must use fixed instead of VBR as we don't
// know how many VBR chunks to reserve ahead of time.
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
- unsigned VSTOffsetAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned VSTOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Emit the placeholder
uint64_t Vals[] = {bitc::MODULE_CODE_VSTOFFSET, 0};
@@ -1155,7 +1154,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
unsigned SimpleGVarAbbrev = 0;
if (!M.global_empty()) {
// Add an abbrev for common globals with no visibility or thread localness.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_GLOBALVAR));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
Log2_32_Ceil(MaxGlobalType+1)));
@@ -1177,7 +1176,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
Log2_32_Ceil(SectionMap.size()+1)));
// Don't bother emitting vis + thread local.
- SimpleGVarAbbrev = Stream.EmitAbbrev(Abbv);
+ SimpleGVarAbbrev = Stream.EmitAbbrev(std::move(Abbv));
}
// Emit the global variable information.
@@ -1285,11 +1284,11 @@ void ModuleBitcodeWriter::writeModuleInfo() {
AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7);
// MODULE_CODE_SOURCE_FILENAME: [namechar x N]
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(AbbrevOpToUse);
- unsigned FilenameAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned FilenameAbbrev = Stream.EmitAbbrev(std::move(Abbv));
for (const auto P : M.getSourceFileName())
Vals.push_back((unsigned char)P);
@@ -1360,14 +1359,14 @@ void ModuleBitcodeWriter::writeMDTuple(const MDTuple *N,
unsigned ModuleBitcodeWriter::createDILocationAbbrev() {
// Assume the column is usually under 128, and always output the inlined-at
// location (it's never more expensive than building an array size 1).
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_LOCATION));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
- return Stream.EmitAbbrev(Abbv);
+ return Stream.EmitAbbrev(std::move(Abbv));
}
void ModuleBitcodeWriter::writeDILocation(const DILocation *N,
@@ -1389,7 +1388,7 @@ void ModuleBitcodeWriter::writeDILocation(const DILocation *N,
unsigned ModuleBitcodeWriter::createGenericDINodeAbbrev() {
// Assume the column is usually under 128, and always output the inlined-at
// location (it's never more expensive than building an array size 1).
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_GENERIC_DEBUG));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
@@ -1397,7 +1396,7 @@ unsigned ModuleBitcodeWriter::createGenericDINodeAbbrev() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
- return Stream.EmitAbbrev(Abbv);
+ return Stream.EmitAbbrev(std::move(Abbv));
}
void ModuleBitcodeWriter::writeGenericDINode(const GenericDINode *N,
@@ -1790,11 +1789,11 @@ void ModuleBitcodeWriter::writeDIImportedEntity(
}
unsigned ModuleBitcodeWriter::createNamedMetadataAbbrev() {
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_NAME));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
- return Stream.EmitAbbrev(Abbv);
+ return Stream.EmitAbbrev(std::move(Abbv));
}
void ModuleBitcodeWriter::writeNamedMetadata(
@@ -1819,12 +1818,12 @@ void ModuleBitcodeWriter::writeNamedMetadata(
}
unsigned ModuleBitcodeWriter::createMetadataStringsAbbrev() {
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRINGS));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # of strings
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // offset to chars
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
- return Stream.EmitAbbrev(Abbv);
+ return Stream.EmitAbbrev(std::move(Abbv));
}
/// Write out a record for MDString.
@@ -1918,17 +1917,17 @@ void ModuleBitcodeWriter::writeModuleMetadata() {
MDAbbrevs[MetadataAbbrev::GenericDINodeAbbrevID] =
createGenericDINodeAbbrev();
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_INDEX_OFFSET));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
- unsigned OffsetAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned OffsetAbbrev = Stream.EmitAbbrev(std::move(Abbv));
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_INDEX));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
- unsigned IndexAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned IndexAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Emit MDStrings together upfront.
writeMetadataStrings(VE.getMDStrings(), Record);
@@ -2125,30 +2124,30 @@ void ModuleBitcodeWriter::writeConstants(unsigned FirstVal, unsigned LastVal,
// If this is a constant pool for the module, emit module-specific abbrevs.
if (isGlobal) {
// Abbrev for CST_CODE_AGGREGATE.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_AGGREGATE));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(LastVal+1)));
- AggregateAbbrev = Stream.EmitAbbrev(Abbv);
+ AggregateAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for CST_CODE_STRING.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_STRING));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
- String8Abbrev = Stream.EmitAbbrev(Abbv);
+ String8Abbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for CST_CODE_CSTRING.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
- CString7Abbrev = Stream.EmitAbbrev(Abbv);
+ CString7Abbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for CST_CODE_CSTRING.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
- CString6Abbrev = Stream.EmitAbbrev(Abbv);
+ CString6Abbrev = Stream.EmitAbbrev(std::move(Abbv));
}
SmallVector<uint64_t, 64> Record;
@@ -2858,39 +2857,39 @@ void ModuleBitcodeWriter::writeValueSymbolTable(
unsigned GUIDEntryAbbrev;
if (IsModuleLevel && hasVSTOffsetPlaceholder()) {
// 8-bit fixed-width VST_CODE_FNENTRY function strings.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
- FnEntry8BitAbbrev = Stream.EmitAbbrev(Abbv);
+ FnEntry8BitAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// 7-bit fixed width VST_CODE_FNENTRY function strings.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
- FnEntry7BitAbbrev = Stream.EmitAbbrev(Abbv);
+ FnEntry7BitAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// 6-bit char6 VST_CODE_FNENTRY function strings.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
- FnEntry6BitAbbrev = Stream.EmitAbbrev(Abbv);
+ FnEntry6BitAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// FIXME: Change the name of this record as it is now used by
// the per-module index as well.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid
- GUIDEntryAbbrev = Stream.EmitAbbrev(Abbv);
+ GUIDEntryAbbrev = Stream.EmitAbbrev(std::move(Abbv));
}
// FIXME: Set up the abbrev, we know how many values there are!
@@ -2984,11 +2983,11 @@ void IndexBitcodeWriter::writeCombinedValueSymbolTable() {
Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid
- unsigned EntryAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned EntryAbbrev = Stream.EmitAbbrev(std::move(Abbv));
SmallVector<uint64_t, 64> NameVals;
for (const auto &GVI : valueIds()) {
@@ -3121,7 +3120,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
Stream.EnterBlockInfoBlock();
{ // 8-bit fixed-width VST_CODE_ENTRY/VST_CODE_BBENTRY strings.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
@@ -3132,7 +3131,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
}
{ // 7-bit fixed width VST_CODE_ENTRY strings.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
@@ -3142,7 +3141,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
llvm_unreachable("Unexpected abbrev ordering!");
}
{ // 6-bit char6 VST_CODE_ENTRY strings.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
@@ -3152,7 +3151,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
llvm_unreachable("Unexpected abbrev ordering!");
}
{ // 6-bit char6 VST_CODE_BBENTRY strings.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_BBENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
@@ -3165,7 +3164,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
{ // SETTYPE abbrev for CONSTANTS_BLOCK.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_SETTYPE));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
VE.computeBitsRequiredForTypeIndicies()));
@@ -3175,7 +3174,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
}
{ // INTEGER abbrev for CONSTANTS_BLOCK.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_INTEGER));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) !=
@@ -3184,7 +3183,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
}
{ // CE_CAST abbrev for CONSTANTS_BLOCK.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CE_CAST));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // cast opc
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // typeid
@@ -3196,7 +3195,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
llvm_unreachable("Unexpected abbrev ordering!");
}
{ // NULL abbrev for CONSTANTS_BLOCK.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_NULL));
if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) !=
CONSTANTS_NULL_Abbrev)
@@ -3206,7 +3205,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
// FIXME: This should only use space for first class types!
{ // INST_LOAD abbrev for FUNCTION_BLOCK.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_LOAD));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Ptr
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty
@@ -3218,7 +3217,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
llvm_unreachable("Unexpected abbrev ordering!");
}
{ // INST_BINOP abbrev for FUNCTION_BLOCK.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
@@ -3228,7 +3227,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
llvm_unreachable("Unexpected abbrev ordering!");
}
{ // INST_BINOP_FLAGS abbrev for FUNCTION_BLOCK.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
@@ -3239,7 +3238,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
llvm_unreachable("Unexpected abbrev ordering!");
}
{ // INST_CAST abbrev for FUNCTION_BLOCK.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_CAST));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // OpVal
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty
@@ -3251,14 +3250,14 @@ void ModuleBitcodeWriter::writeBlockInfo() {
}
{ // INST_RET abbrev for FUNCTION_BLOCK.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET));
if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
FUNCTION_INST_RET_VOID_ABBREV)
llvm_unreachable("Unexpected abbrev ordering!");
}
{ // INST_RET abbrev for FUNCTION_BLOCK.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ValID
if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
@@ -3266,14 +3265,14 @@ void ModuleBitcodeWriter::writeBlockInfo() {
llvm_unreachable("Unexpected abbrev ordering!");
}
{ // INST_UNREACHABLE abbrev for FUNCTION_BLOCK.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_UNREACHABLE));
if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
FUNCTION_INST_UNREACHABLE_ABBREV)
llvm_unreachable("Unexpected abbrev ordering!");
}
{
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_GEP));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty
@@ -3296,38 +3295,38 @@ void IndexBitcodeWriter::writeModStrings() {
// TODO: See which abbrev sizes we actually need to emit
// 8-bit fixed-width MST_ENTRY strings.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
- unsigned Abbrev8Bit = Stream.EmitAbbrev(Abbv);
+ unsigned Abbrev8Bit = Stream.EmitAbbrev(std::move(Abbv));
// 7-bit fixed width MST_ENTRY strings.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
- unsigned Abbrev7Bit = Stream.EmitAbbrev(Abbv);
+ unsigned Abbrev7Bit = Stream.EmitAbbrev(std::move(Abbv));
// 6-bit char6 MST_ENTRY strings.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
- unsigned Abbrev6Bit = Stream.EmitAbbrev(Abbv);
+ unsigned Abbrev6Bit = Stream.EmitAbbrev(std::move(Abbv));
// Module Hash, 160 bits SHA1. Optionally, emitted after each MST_CODE_ENTRY.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_HASH));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
- unsigned AbbrevHash = Stream.EmitAbbrev(Abbv);
+ unsigned AbbrevHash = Stream.EmitAbbrev(std::move(Abbv));
SmallVector<unsigned, 64> Vals;
for (const auto &MPSE : Index.modulePaths()) {
@@ -3435,7 +3434,7 @@ void ModuleBitcodeWriter::writeModuleLevelReferences(
// Current version for the summary.
// This is bumped whenever we introduce changes in the way some record are
// interpreted, like flags for instance.
-static const uint64_t INDEX_VERSION = 2;
+static const uint64_t INDEX_VERSION = 3;
/// Emit the per-module summary section alongside the rest of
/// the module's bitcode.
@@ -3450,7 +3449,7 @@ void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() {
}
// Abbrev for FS_PERMODULE.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
@@ -3459,10 +3458,10 @@ void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() {
// numrefs x valueid, n x (valueid)
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
- unsigned FSCallsAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned FSCallsAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for FS_PERMODULE_PROFILE.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_PROFILE));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
@@ -3471,24 +3470,24 @@ void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() {
// numrefs x valueid, n x (valueid, hotness)
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
- unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for FS_PERMODULE_GLOBALVAR_INIT_REFS.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); // valueids
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
- unsigned FSModRefsAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned FSModRefsAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for FS_ALIAS.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_ALIAS));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
- unsigned FSAliasAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned FSAliasAbbrev = Stream.EmitAbbrev(std::move(Abbv));
SmallVector<uint64_t, 64> NameVals;
// Iterate over the list of functions instead of the Index to
@@ -3542,7 +3541,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Stream.EmitRecord(bitc::FS_VERSION, ArrayRef<uint64_t>{INDEX_VERSION});
// Abbrev for FS_COMBINED.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid
@@ -3552,10 +3551,10 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
// numrefs x valueid, n x (valueid)
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
- unsigned FSCallsAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned FSCallsAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for FS_COMBINED_PROFILE.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_PROFILE));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid
@@ -3565,26 +3564,26 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
// numrefs x valueid, n x (valueid, hotness)
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
- unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for FS_COMBINED_GLOBALVAR_INIT_REFS.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_GLOBALVAR_INIT_REFS));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); // valueids
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
- unsigned FSModRefsAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned FSModRefsAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for FS_COMBINED_ALIAS.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_ALIAS));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
- unsigned FSAliasAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned FSAliasAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// The aliases are emitted as a post-pass, and will point to the value
// id of the aliasee. Save them in a vector for post-processing.
@@ -3702,19 +3701,19 @@ void writeIdentificationBlock(BitstreamWriter &Stream) {
Stream.EnterSubblock(bitc::IDENTIFICATION_BLOCK_ID, 5);
// Write the "user readable" string identifying the bitcode producer
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_STRING));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
- auto StringAbbrev = Stream.EmitAbbrev(Abbv);
+ auto StringAbbrev = Stream.EmitAbbrev(std::move(Abbv));
writeStringRecord(Stream, bitc::IDENTIFICATION_CODE_STRING,
"LLVM" LLVM_VERSION_STRING, StringAbbrev);
// Write the epoch version
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_EPOCH));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
- auto EpochAbbrev = Stream.EmitAbbrev(Abbv);
+ auto EpochAbbrev = Stream.EmitAbbrev(std::move(Abbv));
SmallVector<unsigned, 1> Vals = {bitc::BITCODE_CURRENT_EPOCH};
Stream.EmitRecord(bitc::IDENTIFICATION_CODE_EPOCH, Vals, EpochAbbrev);
Stream.ExitBlock();
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 0c79def87933..61149d9229b7 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -53,7 +53,8 @@ void ARMException::beginFunction(const MachineFunction *MF) {
if (MoveType == AsmPrinter::CFI_M_Debug) {
if (!hasEmittedCFISections) {
- Asm->OutStreamer->EmitCFISections(false, true);
+ if (Asm->needsOnlyDebugCFIMoves())
+ Asm->OutStreamer->EmitCFISections(false, true);
hasEmittedCFISections = true;
}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 5f15ac1d503b..9f6caa95a9ed 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -108,7 +108,7 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL,
AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer)
: MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()),
OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)),
- LastMI(nullptr), LastFn(0), Counter(~0U) {
+ isCFIMoveForDebugging(false), LastMI(nullptr), LastFn(0), Counter(~0U) {
DD = nullptr;
MMI = nullptr;
LI = nullptr;
@@ -264,6 +264,28 @@ bool AsmPrinter::doInitialization(Module &M) {
}
}
+ switch (MAI->getExceptionHandlingType()) {
+ case ExceptionHandling::SjLj:
+ case ExceptionHandling::DwarfCFI:
+ case ExceptionHandling::ARM:
+ isCFIMoveForDebugging = true;
+ if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI)
+ break;
+ for (auto &F: M.getFunctionList()) {
+ // If the module contains any function with unwind data,
+ // .eh_frame has to be emitted.
+ // Ignore functions that won't get emitted.
+ if (!F.isDeclarationForLinker() && F.needsUnwindTableEntry()) {
+ isCFIMoveForDebugging = false;
+ break;
+ }
+ }
+ break;
+ default:
+ isCFIMoveForDebugging = false;
+ break;
+ }
+
EHStreamer *ES = nullptr;
switch (MAI->getExceptionHandlingType()) {
case ExceptionHandling::None:
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 20075e41977f..57864e4e4d4f 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -100,6 +100,8 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
}
SourceMgr SrcMgr;
+ SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
+
SrcMgrDiagInfo DiagInfo;
// If the current LLVMContext has an inline asm handler, set it in SourceMgr.
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index ef30e279aed2..e08306b001fb 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -137,7 +137,7 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
return;
if (!hasEmittedCFISections) {
- if (Asm->needsCFIMoves() == AsmPrinter::CFI_M_Debug)
+ if (Asm->needsOnlyDebugCFIMoves())
Asm->OutStreamer->EmitCFISections(false, true);
hasEmittedCFISections = true;
}
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index cf35afbc6e5f..89a042ffc477 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -125,8 +125,11 @@ unsigned IRTranslator::getMemOpAlignment(const Instruction &I) {
MachineBasicBlock &IRTranslator::getOrCreateBB(const BasicBlock &BB) {
MachineBasicBlock *&MBB = BBToMBB[&BB];
if (!MBB) {
- MBB = MF->CreateMachineBasicBlock();
+ MBB = MF->CreateMachineBasicBlock(&BB);
MF->push_back(MBB);
+
+ if (BB.hasAddressTaken())
+ MBB->setHasAddressTaken();
}
return *MBB;
}
@@ -195,6 +198,45 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
return true;
}
+bool IRTranslator::translateSwitch(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ // For now, just translate as a chain of conditional branches.
+ // FIXME: could we share most of the logic/code in
+ // SelectionDAGBuilder::visitSwitch between SelectionDAG and GlobalISel?
+ // At first sight, it seems most of the logic in there is independent of
+ // SelectionDAG-specifics and a lot of work went in to optimize switch
+ // lowering in there.
+
+ const SwitchInst &SwInst = cast<SwitchInst>(U);
+ const unsigned SwCondValue = getOrCreateVReg(*SwInst.getCondition());
+
+ LLT LLTi1 = LLT(*Type::getInt1Ty(U.getContext()), *DL);
+ for (auto &CaseIt : SwInst.cases()) {
+ const unsigned CaseValueReg = getOrCreateVReg(*CaseIt.getCaseValue());
+ const unsigned Tst = MRI->createGenericVirtualRegister(LLTi1);
+ MIRBuilder.buildICmp(CmpInst::ICMP_EQ, Tst, CaseValueReg, SwCondValue);
+ MachineBasicBlock &CurBB = MIRBuilder.getMBB();
+ MachineBasicBlock &TrueBB = getOrCreateBB(*CaseIt.getCaseSuccessor());
+
+ MIRBuilder.buildBrCond(Tst, TrueBB);
+ CurBB.addSuccessor(&TrueBB);
+
+ MachineBasicBlock *FalseBB =
+ MF->CreateMachineBasicBlock(SwInst.getParent());
+ MF->push_back(FalseBB);
+ MIRBuilder.buildBr(*FalseBB);
+ CurBB.addSuccessor(FalseBB);
+
+ MIRBuilder.setMBB(*FalseBB);
+ }
+ // handle default case
+ MachineBasicBlock &DefaultBB = getOrCreateBB(*SwInst.getDefaultDest());
+ MIRBuilder.buildBr(DefaultBB);
+ MIRBuilder.getMBB().addSuccessor(&DefaultBB);
+
+ return true;
+}
+
bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
const LoadInst &LI = cast<LoadInst>(U);
diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index a6c93bc0f3d7..7d405dd92ac3 100644
--- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -55,11 +55,10 @@ const unsigned RegisterBankInfo::InvalidMappingID = UINT_MAX - 1;
RegisterBankInfo::RegisterBankInfo(RegisterBank **RegBanks,
unsigned NumRegBanks)
: RegBanks(RegBanks), NumRegBanks(NumRegBanks) {
- DEBUG(for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
+#ifndef NDEBUG
+ for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx)
assert(RegBanks[Idx] != nullptr && "Invalid RegisterBank");
- assert(!RegBanks[Idx]->isValid() &&
- "RegisterBank should be invalid before initialization");
- });
+#endif // NDEBUG
}
RegisterBankInfo::~RegisterBankInfo() {
@@ -70,13 +69,15 @@ RegisterBankInfo::~RegisterBankInfo() {
}
bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
- DEBUG(for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
+#ifndef NDEBUG
+ for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
const RegisterBank &RegBank = getRegBank(Idx);
assert(Idx == RegBank.getID() &&
"ID does not match the index in the array");
dbgs() << "Verify " << RegBank << '\n';
assert(RegBank.verify(TRI) && "RegBank is invalid");
- });
+ }
+#endif // NDEBUG
return true;
}
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 0cac7b71e241..b9f3d86eabd8 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -1495,16 +1495,18 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
if (TII->reverseBranchCondition(Cond))
llvm_unreachable("Unable to reverse branch condition!");
- // Initialize liveins to the first BB. These are potentiall redefined by
- // predicated instructions.
Redefs.init(*TRI);
- Redefs.addLiveIns(CvtMBB);
- Redefs.addLiveIns(NextMBB);
-
- // Compute a set of registers which must not be killed by instructions in
- // BB1: This is everything live-in to BB2.
DontKill.init(*TRI);
- DontKill.addLiveIns(NextMBB);
+
+ if (MRI->tracksLiveness()) {
+ // Initialize liveins to the first BB. These are potentiall redefined by
+ // predicated instructions.
+ Redefs.addLiveIns(CvtMBB);
+ Redefs.addLiveIns(NextMBB);
+ // Compute a set of registers which must not be killed by instructions in
+ // BB1: This is everything live-in to BB2.
+ DontKill.addLiveIns(NextMBB);
+ }
if (CvtMBB.pred_size() > 1) {
BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
@@ -1602,8 +1604,10 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
// Initialize liveins to the first BB. These are potentially redefined by
// predicated instructions.
Redefs.init(*TRI);
- Redefs.addLiveIns(CvtMBB);
- Redefs.addLiveIns(NextMBB);
+ if (MRI->tracksLiveness()) {
+ Redefs.addLiveIns(CvtMBB);
+ Redefs.addLiveIns(NextMBB);
+ }
DontKill.clear();
@@ -1766,8 +1770,10 @@ bool IfConverter::IfConvertDiamondCommon(
// instructions. We start with BB1 live-ins so we have the live-out regs
// after tracking the BB1 instructions.
Redefs.init(*TRI);
- Redefs.addLiveIns(MBB1);
- Redefs.addLiveIns(MBB2);
+ if (MRI->tracksLiveness()) {
+ Redefs.addLiveIns(MBB1);
+ Redefs.addLiveIns(MBB2);
+ }
// Remove the duplicated instructions at the beginnings of both paths.
// Skip dbg_value instructions
@@ -1792,12 +1798,14 @@ bool IfConverter::IfConvertDiamondCommon(
// This is everything used+live in BB2 after the duplicated instructions. We
// can compute this set by simulating liveness backwards from the end of BB2.
DontKill.init(*TRI);
- for (const MachineInstr &MI : make_range(MBB2.rbegin(), ++DI2.getReverse()))
- DontKill.stepBackward(MI);
+ if (MRI->tracksLiveness()) {
+ for (const MachineInstr &MI : make_range(MBB2.rbegin(), ++DI2.getReverse()))
+ DontKill.stepBackward(MI);
- for (const MachineInstr &MI : make_range(MBB1.begin(), DI1)) {
- SmallVector<std::pair<unsigned, const MachineOperand*>, 4> IgnoredClobbers;
- Redefs.stepForward(MI, IgnoredClobbers);
+ for (const MachineInstr &MI : make_range(MBB1.begin(), DI1)) {
+ SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Dummy;
+ Redefs.stepForward(MI, Dummy);
+ }
}
BBI.BB->splice(BBI.BB->end(), &MBB1, MBB1.begin(), DI1);
MBB2.erase(MBB2.begin(), DI2);
diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp
index eb13d2d3ec0c..db87092177ca 100644
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@@ -488,16 +488,16 @@ void MIPrinter::print(const MachineBasicBlock &MBB) {
}
// Print the live in registers.
- const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
- assert(TRI && "Expected target register info");
- if (!MBB.livein_empty()) {
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ if (MRI.tracksLiveness() && !MBB.livein_empty()) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
OS.indent(2) << "liveins: ";
bool First = true;
for (const auto &LI : MBB.liveins()) {
if (!First)
OS << ", ";
First = false;
- printReg(LI.PhysReg, OS, TRI);
+ printReg(LI.PhysReg, OS, &TRI);
if (!LI.LaneMask.all())
OS << ":0x" << PrintLaneMask(LI.LaneMask);
}
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 549424d257fe..3869f976854d 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -286,7 +286,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (!livein_empty()) {
if (Indexes) OS << '\t';
OS << " Live Ins:";
- for (const auto &LI : make_range(livein_begin(), livein_end())) {
+ for (const auto &LI : LiveIns) {
OS << ' ' << PrintReg(LI.PhysReg, TRI);
if (!LI.LaneMask.all())
OS << ':' << PrintLaneMask(LI.LaneMask);
@@ -1292,3 +1292,10 @@ MachineBasicBlock::getEndClobberMask(const TargetRegisterInfo *TRI) const {
void MachineBasicBlock::clearLiveIns() {
LiveIns.clear();
}
+
+MachineBasicBlock::livein_iterator MachineBasicBlock::livein_begin() const {
+ assert(getParent()->getProperties().hasProperty(
+ MachineFunctionProperties::Property::TracksLiveness) &&
+ "Liveness information is accurate");
+ return LiveIns.begin();
+}
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 426a4666c649..a98139f9e5af 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -566,7 +566,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
FirstTerminator = nullptr;
if (!MF->getProperties().hasProperty(
- MachineFunctionProperties::Property::NoPHIs)) {
+ MachineFunctionProperties::Property::NoPHIs) && MRI->tracksLiveness()) {
// If this block has allocatable physical registers live-in, check that
// it is an entry block or landing pad.
for (const auto &LI : MBB->liveins()) {
@@ -741,14 +741,16 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
}
regsLive.clear();
- for (const auto &LI : MBB->liveins()) {
- if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) {
- report("MBB live-in list contains non-physical register", MBB);
- continue;
+ if (MRI->tracksLiveness()) {
+ for (const auto &LI : MBB->liveins()) {
+ if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) {
+ report("MBB live-in list contains non-physical register", MBB);
+ continue;
+ }
+ for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ regsLive.insert(*SubRegs);
}
- for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- regsLive.insert(*SubRegs);
}
regsLiveInButUnused = regsLive;
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index de1c35caa1a0..fdf741fd58f7 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -48,11 +48,6 @@ void RegScavenger::init(MachineBasicBlock &MBB) {
assert((NumRegUnits == 0 || NumRegUnits == TRI->getNumRegUnits()) &&
"Target changed?");
- // It is not possible to use the register scavenger after late optimization
- // passes that don't preserve accurate liveness information.
- assert(MRI->tracksLiveness() &&
- "Cannot use register scavenger with inaccurate liveness");
-
// Self-initialize.
if (!this->MBB) {
NumRegUnits = TRI->getNumRegUnits();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index cb803585282f..a07bd8f83546 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -40,6 +40,7 @@
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
@@ -7339,19 +7340,23 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
if (!Range)
return Op;
- Constant *Lo = cast<ConstantAsMetadata>(Range->getOperand(0))->getValue();
- if (!Lo->isNullValue())
+ ConstantRange CR = getConstantRangeFromMetadata(*Range);
+ if (CR.isFullSet() || CR.isEmptySet() || CR.isWrappedSet())
return Op;
- Constant *Hi = cast<ConstantAsMetadata>(Range->getOperand(1))->getValue();
- unsigned Bits = cast<ConstantInt>(Hi)->getValue().logBase2();
+ APInt Lo = CR.getUnsignedMin();
+ if (!Lo.isMinValue())
+ return Op;
+
+ APInt Hi = CR.getUnsignedMax();
+ unsigned Bits = Hi.getActiveBits();
EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits);
SDLoc SL = getCurSDLoc();
- SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(),
- Op, DAG.getValueType(SmallVT));
+ SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op,
+ DAG.getValueType(SmallVT));
unsigned NumVals = Op.getNode()->getNumValues();
if (NumVals == 1)
return ZExt;
diff --git a/lib/DebugInfo/DWARF/DWARFDie.cpp b/lib/DebugInfo/DWARF/DWARFDie.cpp
index deec16330224..2aac3474654f 100644
--- a/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -299,11 +299,8 @@ DWARFDie::collectChildrenAddressRanges(DWARFAddressRangesVector& Ranges) const {
Ranges.insert(Ranges.end(), DIERanges.begin(), DIERanges.end());
}
- DWARFDie Child = getFirstChild();
- while (Child) {
+ for (auto Child: children())
Child.collectChildrenAddressRanges(Ranges);
- Child = Child.getSibling();
- }
}
bool DWARFDie::addressRangeContainsAddress(const uint64_t Address) const {
diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp
index e6c9764f1133..2bbcb25275e4 100644
--- a/lib/Fuzzer/FuzzerDriver.cpp
+++ b/lib/Fuzzer/FuzzerDriver.cpp
@@ -468,6 +468,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
Options.HandleInt = Flags.handle_int;
Options.HandleSegv = Flags.handle_segv;
Options.HandleTerm = Flags.handle_term;
+ Options.HandleXfsz = Flags.handle_xfsz;
SetSignalHandler(Options);
if (Flags.minimize_crash_internal_step)
diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def
index 08eaad9856be..22aad353acec 100644
--- a/lib/Fuzzer/FuzzerFlags.def
+++ b/lib/Fuzzer/FuzzerFlags.def
@@ -91,6 +91,7 @@ FUZZER_FLAG_INT(handle_ill, 1, "If 1, try to intercept SIGILL.")
FUZZER_FLAG_INT(handle_fpe, 1, "If 1, try to intercept SIGFPE.")
FUZZER_FLAG_INT(handle_int, 1, "If 1, try to intercept SIGINT.")
FUZZER_FLAG_INT(handle_term, 1, "If 1, try to intercept SIGTERM.")
+FUZZER_FLAG_INT(handle_xfsz, 1, "If 1, try to intercept SIGXFSZ.")
FUZZER_FLAG_INT(close_fd_mask, 0, "If 1, close stdout at startup; "
"if 2, close stderr; if 3, close both. "
"Be careful, this will also close e.g. asan's stderr/stdout.")
diff --git a/lib/Fuzzer/FuzzerIO.h b/lib/Fuzzer/FuzzerIO.h
index 741fecf415b0..15bfd3d34727 100644
--- a/lib/Fuzzer/FuzzerIO.h
+++ b/lib/Fuzzer/FuzzerIO.h
@@ -37,6 +37,9 @@ std::string DirPlusFile(const std::string &DirPath,
// Returns the name of the dir, similar to the 'dirname' utility.
std::string DirName(const std::string &FileName);
+// Returns path to a TmpDir.
+std::string TmpDir();
+
void DupAndCloseStderr();
void CloseStdout();
diff --git a/lib/Fuzzer/FuzzerIOPosix.cpp b/lib/Fuzzer/FuzzerIOPosix.cpp
index 720bc1304594..6d8edf6ff538 100644
--- a/lib/Fuzzer/FuzzerIOPosix.cpp
+++ b/lib/Fuzzer/FuzzerIOPosix.cpp
@@ -83,6 +83,12 @@ std::string DirName(const std::string &FileName) {
return Res;
}
+std::string TmpDir() {
+ if (auto Env = getenv("TMPDIR"))
+ return Env;
+ return "/tmp";
+}
+
} // namespace fuzzer
#endif // LIBFUZZER_POSIX
diff --git a/lib/Fuzzer/FuzzerIOWindows.cpp b/lib/Fuzzer/FuzzerIOWindows.cpp
index a4738eb9dfe5..056f0721a336 100644
--- a/lib/Fuzzer/FuzzerIOWindows.cpp
+++ b/lib/Fuzzer/FuzzerIOWindows.cpp
@@ -277,6 +277,8 @@ std::string DirName(const std::string &FileName) {
return FileName.substr(0, LocationLen + DirLen);
}
+std::string TmpDir() { return "TODO: implement TmpDir"; }
+
} // namespace fuzzer
#endif // LIBFUZZER_WINDOWS
diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h
index c041706092db..0d2c7a78aca8 100644
--- a/lib/Fuzzer/FuzzerInternal.h
+++ b/lib/Fuzzer/FuzzerInternal.h
@@ -82,6 +82,7 @@ public:
static void StaticAlarmCallback();
static void StaticCrashSignalCallback();
static void StaticInterruptCallback();
+ static void StaticFileSizeExceedCallback();
void ExecuteCallback(const uint8_t *Data, size_t Size);
size_t RunOne(const uint8_t *Data, size_t Size);
diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp
index 1336f5e4aeeb..9f49d1557990 100644
--- a/lib/Fuzzer/FuzzerLoop.cpp
+++ b/lib/Fuzzer/FuzzerLoop.cpp
@@ -266,6 +266,11 @@ void Fuzzer::StaticInterruptCallback() {
F->InterruptCallback();
}
+void Fuzzer::StaticFileSizeExceedCallback() {
+ Printf("==%lu== ERROR: libFuzzer: file size exceeded\n", GetPid());
+ exit(1);
+}
+
void Fuzzer::CrashCallback() {
Printf("==%lu== ERROR: libFuzzer: deadly signal\n", GetPid());
if (EF->__sanitizer_print_stack_trace)
diff --git a/lib/Fuzzer/FuzzerMerge.cpp b/lib/Fuzzer/FuzzerMerge.cpp
index 84660e0fe53f..9e559115680c 100644
--- a/lib/Fuzzer/FuzzerMerge.cpp
+++ b/lib/Fuzzer/FuzzerMerge.cpp
@@ -220,8 +220,8 @@ void Fuzzer::CrashResistantMerge(const std::vector<std::string> &Args,
ListFilesInDirRecursive(Corpora[i], nullptr, &AllFiles, /*TopDir*/true);
Printf("MERGE-OUTER: %zd files, %zd in the initial corpus\n",
AllFiles.size(), NumFilesInFirstCorpus);
- std::string CFPath =
- "libFuzzerTemp." + std::to_string(GetPid()) + ".txt";
+ auto CFPath = DirPlusFile(TmpDir(),
+ "libFuzzerTemp." + std::to_string(GetPid()) + ".txt");
// Write the control file.
RemoveFile(CFPath);
std::ofstream ControlFile(CFPath);
@@ -229,6 +229,11 @@ void Fuzzer::CrashResistantMerge(const std::vector<std::string> &Args,
ControlFile << NumFilesInFirstCorpus << "\n";
for (auto &Path: AllFiles)
ControlFile << Path << "\n";
+ if (!ControlFile) {
+ Printf("MERGE-OUTER: failed to write to the control file: %s\n",
+ CFPath.c_str());
+ exit(1);
+ }
ControlFile.close();
// Execute the inner process untill it passes.
@@ -246,6 +251,9 @@ void Fuzzer::CrashResistantMerge(const std::vector<std::string> &Args,
// Read the control file and do the merge.
Merger M;
std::ifstream IF(CFPath);
+ IF.seekg(0, IF.end);
+ Printf("MERGE-OUTER: the control file has %zd bytes\n", (size_t)IF.tellg());
+ IF.seekg(0, IF.beg);
M.ParseOrExit(IF, true);
IF.close();
std::vector<std::string> NewFiles;
diff --git a/lib/Fuzzer/FuzzerOptions.h b/lib/Fuzzer/FuzzerOptions.h
index cb702d285200..6f72205600b9 100644
--- a/lib/Fuzzer/FuzzerOptions.h
+++ b/lib/Fuzzer/FuzzerOptions.h
@@ -62,6 +62,7 @@ struct FuzzingOptions {
bool HandleInt = false;
bool HandleSegv = false;
bool HandleTerm = false;
+ bool HandleXfsz = false;
};
} // namespace fuzzer
diff --git a/lib/Fuzzer/FuzzerTraceState.cpp b/lib/Fuzzer/FuzzerTraceState.cpp
index be62a6624b27..2ad9702fab0e 100644
--- a/lib/Fuzzer/FuzzerTraceState.cpp
+++ b/lib/Fuzzer/FuzzerTraceState.cpp
@@ -46,10 +46,6 @@ public:
void TraceMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
const uint8_t *Data2);
- void TraceSwitchCallback(uintptr_t PC, size_t ValSizeInBits, uint64_t Val,
- size_t NumCases, uint64_t *Cases);
- int TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData,
- size_t DataSize);
int TryToAddDesiredData(const uint8_t *PresentData,
const uint8_t *DesiredData, size_t DataSize);
@@ -147,29 +143,6 @@ public:
size_t AutoDictAdds = 0;
};
-int TraceState::TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData,
- size_t DataSize) {
- if (NumMutations >= kMaxMutations || !WantToHandleOneMoreMutation()) return 0;
- ScopedDoingMyOwnMemmem scoped_doing_my_own_memmem;
- const uint8_t *UnitData;
- auto UnitSize = F->GetCurrentUnitInFuzzingThead(&UnitData);
- int Res = 0;
- const uint8_t *Beg = UnitData;
- const uint8_t *End = Beg + UnitSize;
- for (const uint8_t *Cur = Beg; Cur < End; Cur++) {
- Cur = (uint8_t *)SearchMemory(Cur, End - Cur, &PresentData, DataSize);
- if (!Cur)
- break;
- size_t Pos = Cur - Beg;
- assert(Pos < UnitSize);
- AddMutation(Pos, DataSize, DesiredData);
- AddMutation(Pos, DataSize, DesiredData + 1);
- AddMutation(Pos, DataSize, DesiredData - 1);
- Res++;
- }
- return Res;
-}
-
int TraceState::TryToAddDesiredData(const uint8_t *PresentData,
const uint8_t *DesiredData,
size_t DataSize) {
@@ -206,26 +179,6 @@ void TraceState::TraceMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
}
}
-void TraceState::TraceSwitchCallback(uintptr_t PC, size_t ValSizeInBits,
- uint64_t Val, size_t NumCases,
- uint64_t *Cases) {
- if (F->InFuzzingThread()) return;
- size_t ValSize = ValSizeInBits / 8;
- bool TryShort = IsTwoByteData(Val);
- for (size_t i = 0; i < NumCases; i++)
- TryShort &= IsTwoByteData(Cases[i]);
-
- if (Options.Verbosity >= 3)
- Printf("TraceSwitch: %p %zd # %zd; TryShort %d\n", PC, Val, NumCases,
- TryShort);
-
- for (size_t i = 0; i < NumCases; i++) {
- TryToAddDesiredData(Val, Cases[i], ValSize);
- if (TryShort)
- TryToAddDesiredData(Val, Cases[i], 2);
- }
-}
-
static TraceState *TS;
void Fuzzer::StartTraceRecording() {
diff --git a/lib/Fuzzer/FuzzerUtilPosix.cpp b/lib/Fuzzer/FuzzerUtilPosix.cpp
index 8b484b8effa4..e8d48dc81a3b 100644
--- a/lib/Fuzzer/FuzzerUtilPosix.cpp
+++ b/lib/Fuzzer/FuzzerUtilPosix.cpp
@@ -41,6 +41,10 @@ static void InterruptHandler(int, siginfo_t *, void *) {
Fuzzer::StaticInterruptCallback();
}
+static void FileSizeExceedHandler(int, siginfo_t *, void *) {
+ Fuzzer::StaticFileSizeExceedCallback();
+}
+
static void SetSigaction(int signum,
void (*callback)(int, siginfo_t *, void *)) {
struct sigaction sigact;
@@ -80,6 +84,8 @@ void SetSignalHandler(const FuzzingOptions& Options) {
SetSigaction(SIGILL, CrashHandler);
if (Options.HandleFpe)
SetSigaction(SIGFPE, CrashHandler);
+ if (Options.HandleXfsz)
+ SetSigaction(SIGXFSZ, FileSizeExceedHandler);
}
void SleepSeconds(int Seconds) {
diff --git a/lib/Fuzzer/FuzzerUtilWindows.cpp b/lib/Fuzzer/FuzzerUtilWindows.cpp
index 64adb7cd1380..3ca1f2c8f562 100644
--- a/lib/Fuzzer/FuzzerUtilWindows.cpp
+++ b/lib/Fuzzer/FuzzerUtilWindows.cpp
@@ -58,6 +58,7 @@ LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo) {
if (HandlerOpt->HandleFpe)
Fuzzer::StaticCrashSignalCallback();
break;
+ // TODO: handle (Options.HandleXfsz)
}
return EXCEPTION_CONTINUE_SEARCH;
}
diff --git a/lib/Fuzzer/test/merge.test b/lib/Fuzzer/test/merge.test
index 1f1810eb0195..5c7d30e41caa 100644
--- a/lib/Fuzzer/test/merge.test
+++ b/lib/Fuzzer/test/merge.test
@@ -44,3 +44,11 @@ MERGE_WITH_CRASH: MERGE-OUTER: 3 new files
# Check that we actually limit the size with max_len
RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 -max_len=5 2>&1 | FileCheck %s --check-prefix=MERGE_LEN5
MERGE_LEN5: MERGE-OUTER: succesfull in 1 attempt(s)
+
+# Check that we honor TMPDIR
+RUN: TMPDIR=DIR_DOES_NOT_EXIST not LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=TMPDIR
+TMPDIR: MERGE-OUTER: failed to write to the control file: DIR_DOES_NOT_EXIST/libFuzzerTemp
+
+# Check that we can report an error if file size exceeded
+RUN: (ulimit -f 1; not LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=SIGXFSZ)
+SIGXFSZ: ERROR: libFuzzer: file size exceeded
diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp
index 42b3a344352b..e3e2f9f806c8 100644
--- a/lib/LTO/LTO.cpp
+++ b/lib/LTO/LTO.cpp
@@ -337,12 +337,21 @@ void LTO::addSymbolToGlobalRes(SmallPtrSet<GlobalValue *, 8> &Used,
if (Res.Prevailing)
GlobalRes.IRName = GV->getName();
}
+ // Set the partition to external if we know it is used elsewhere, e.g.
+ // it is visible to a regular object, is referenced from llvm.compiler_used,
+ // or was already recorded as being referenced from a different partition.
if (Res.VisibleToRegularObj || (GV && Used.count(GV)) ||
(GlobalRes.Partition != GlobalResolution::Unknown &&
- GlobalRes.Partition != Partition))
+ GlobalRes.Partition != Partition)) {
GlobalRes.Partition = GlobalResolution::External;
- else
+ } else
+ // First recorded reference, save the current partition.
GlobalRes.Partition = Partition;
+
+ // Flag as visible outside of ThinLTO if visible from a regular object or
+ // if this is a reference in the regular LTO partition.
+ GlobalRes.VisibleOutsideThinLTO |=
+ (Res.VisibleToRegularObj || (Partition == GlobalResolution::RegularLTO));
}
static void writeToResolutionFile(raw_ostream &OS, InputFile *Input,
@@ -848,6 +857,19 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
if (!ModuleToDefinedGVSummaries.count(Mod.first))
ModuleToDefinedGVSummaries.try_emplace(Mod.first);
+ // Compute "dead" symbols, we don't want to import/export these!
+ DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
+ for (auto &Res : GlobalResolutions) {
+ if (Res.second.VisibleOutsideThinLTO &&
+ // IRName will be defined if we have seen the prevailing copy of
+ // this value. If not, no need to preserve any ThinLTO copies.
+ !Res.second.IRName.empty())
+ GUIDPreservedSymbols.insert(GlobalValue::getGUID(Res.second.IRName));
+ }
+
+ auto DeadSymbols =
+ computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols);
+
StringMap<FunctionImporter::ImportMapTy> ImportLists(
ThinLTO.ModuleMap.size());
StringMap<FunctionImporter::ExportSetTy> ExportLists(
@@ -856,12 +878,21 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
if (Conf.OptLevel > 0) {
ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
- ImportLists, ExportLists);
+ ImportLists, ExportLists, &DeadSymbols);
std::set<GlobalValue::GUID> ExportedGUIDs;
for (auto &Res : GlobalResolutions) {
- if (!Res.second.IRName.empty() &&
- Res.second.Partition == GlobalResolution::External)
+ // First check if the symbol was flagged as having external references.
+ if (Res.second.Partition != GlobalResolution::External)
+ continue;
+ // IRName will be defined if we have seen the prevailing copy of
+ // this value. If not, no need to mark as exported from a ThinLTO
+ // partition (and we can't get the GUID).
+ if (Res.second.IRName.empty())
+ continue;
+ auto GUID = GlobalValue::getGUID(Res.second.IRName);
+ // Mark exported unless index-based analysis determined it to be dead.
+ if (!DeadSymbols.count(GUID))
ExportedGUIDs.insert(GlobalValue::getGUID(Res.second.IRName));
}
diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp
index 880dc3dfae98..66ffe6db29d6 100644
--- a/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -581,11 +581,18 @@ void ThinLTOCodeGenerator::promote(Module &TheModule,
StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries;
Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
+ // Convert the preserved symbols set from string to GUID
+ auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
+ PreservedSymbols, Triple(TheModule.getTargetTriple()));
+
+ // Compute "dead" symbols, we don't want to import/export these!
+ auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols);
+
// Generate import/export list
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
- ExportLists);
+ ExportLists, &DeadSymbols);
// Resolve LinkOnce/Weak symbols.
StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
@@ -594,10 +601,6 @@ void ThinLTOCodeGenerator::promote(Module &TheModule,
thinLTOResolveWeakForLinkerModule(
TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);
- // Convert the preserved symbols set from string to GUID
- auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
- PreservedSymbols, Triple(TheModule.getTargetTriple()));
-
// Promote the exported values in the index, so that they are promoted
// in the module.
auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
@@ -623,11 +626,18 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
+ // Convert the preserved symbols set from string to GUID
+ auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
+ PreservedSymbols, Triple(TheModule.getTargetTriple()));
+
+ // Compute "dead" symbols, we don't want to import/export these!
+ auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols);
+
// Generate import/export list
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
- ExportLists);
+ ExportLists, &DeadSymbols);
auto &ImportList = ImportLists[TheModule.getModuleIdentifier()];
crossImportIntoModule(TheModule, Index, ModuleMap, ImportList);
@@ -697,11 +707,14 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule,
StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
+ // Compute "dead" symbols, we don't want to import/export these!
+ auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols);
+
// Generate import/export list
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
- ExportLists);
+ ExportLists, &DeadSymbols);
auto &ExportList = ExportLists[ModuleIdentifier];
// Be friendly and don't nuke totally the module when the client didn't
@@ -836,17 +849,20 @@ void ThinLTOCodeGenerator::run() {
StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
Index->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
+ // Convert the preserved symbols set from string to GUID, this is needed for
+ // computing the caching hash and the internalization.
+ auto GUIDPreservedSymbols =
+ computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
+
+ // Compute "dead" symbols, we don't want to import/export these!
+ auto DeadSymbols = computeDeadSymbols(*Index, GUIDPreservedSymbols);
+
// Collect the import/export lists for all modules from the call-graph in the
// combined index.
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries, ImportLists,
- ExportLists);
-
- // Convert the preserved symbols set from string to GUID, this is needed for
- // computing the caching hash and the internalization.
- auto GUIDPreservedSymbols =
- computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
+ ExportLists, &DeadSymbols);
// We use a std::map here to be able to have a defined ordering when
// producing a hash for the cache entry.
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 0c0b498f1375..fb8b45166a41 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -205,7 +205,7 @@ APInt& APInt::operator++() {
/// This function subtracts a single "digit" (64-bit word), y, from
/// the multi-digit integer array, x[], propagating the borrowed 1 value until
-/// no further borrowing is neeeded or it runs out of "digits" in x. The result
+/// no further borrowing is needed or it runs out of "digits" in x. The result
/// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
/// In other words, if y > x then this function returns 1, otherwise 0.
/// @returns the borrow out of the subtraction
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index ca344b1dc058..15418ad2fd06 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -90,6 +90,7 @@ add_llvm_library(LLVMSupport
StringSaver.cpp
StringRef.cpp
SystemUtils.cpp
+ TarWriter.cpp
TargetParser.cpp
ThreadPool.cpp
Timer.cpp
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 49d0ed55a716..8a09589aa884 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -474,15 +474,25 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model,
break;
// Skylake:
- case 0x4e:
- *Type = INTEL_COREI7; // "skylake-avx512"
- *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
- break;
- case 0x5e:
+ case 0x4e: // Skylake mobile
+ case 0x5e: // Skylake desktop
+ case 0x8e: // Kaby Lake mobile
+ case 0x9e: // Kaby Lake desktop
*Type = INTEL_COREI7; // "skylake"
*Subtype = INTEL_COREI7_SKYLAKE;
break;
+ // Skylake Xeon:
+ case 0x55:
+ *Type = INTEL_COREI7;
+ // Check that we really have AVX512
+ if (Features & (1 << FEATURE_AVX512)) {
+ *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
+ } else {
+ *Subtype = INTEL_COREI7_SKYLAKE; // "skylake"
+ }
+ break;
+
case 0x1c: // Most 45 nm Intel Atom processors
case 0x26: // 45 nm Atom Lincroft
case 0x27: // 32 nm Atom Medfield
diff --git a/lib/Support/TarWriter.cpp b/lib/Support/TarWriter.cpp
new file mode 100644
index 000000000000..5fc17d276377
--- /dev/null
+++ b/lib/Support/TarWriter.cpp
@@ -0,0 +1,166 @@
+//===-- TarWriter.cpp - Tar archive file creator --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TarWriter class provides a feature to create a tar archive file.
+//
+// I put emphasis on simplicity over comprehensiveness when implementing this
+// class because we don't need a full-fledged archive file generator in LLVM
+// at the moment.
+//
+// The filename field in the Unix V7 tar header is 100 bytes. Longer filenames
+// are stored using the PAX extension. The PAX header is standardized in
+// POSIX.1-2001.
+//
+// The struct definition of UstarHeader is copied from
+// https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/TarWriter.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace llvm;
+
+// Each file in an archive must be aligned to this block size.
+static const int BlockSize = 512;
+
+struct UstarHeader {
+ char Name[100];
+ char Mode[8];
+ char Uid[8];
+ char Gid[8];
+ char Size[12];
+ char Mtime[12];
+ char Checksum[8];
+ char TypeFlag;
+ char Linkname[100];
+ char Magic[6];
+ char Version[2];
+ char Uname[32];
+ char Gname[32];
+ char DevMajor[8];
+ char DevMinor[8];
+ char Prefix[155];
+ char Pad[12];
+};
+static_assert(sizeof(UstarHeader) == BlockSize, "invalid Ustar header");
+
+// A PAX attribute is in the form of "<length> <key>=<value>\n"
+// where <length> is the length of the entire string including
+// the length field itself. An example string is this.
+//
+// 25 ctime=1084839148.1212\n
+//
+// This function create such string.
+static std::string formatPax(StringRef Key, StringRef Val) {
+ int Len = Key.size() + Val.size() + 3; // +3 for " ", "=" and "\n"
+
+ // We need to compute total size twice because appending
+ // a length field could change total size by one.
+ int Total = Len + Twine(Len).str().size();
+ Total = Len + Twine(Total).str().size();
+ return (Twine(Total) + " " + Key + "=" + Val + "\n").str();
+}
+
+// Headers in tar files must be aligned to 512 byte boundaries.
+// This function forwards the current file position to the next boundary.
+static void pad(raw_fd_ostream &OS) {
+ uint64_t Pos = OS.tell();
+ OS.seek(alignTo(Pos, BlockSize));
+}
+
+// Computes a checksum for a tar header.
+static void computeChecksum(UstarHeader &Hdr) {
+ // Before computing a checksum, checksum field must be
+ // filled with space characters.
+ memset(Hdr.Checksum, ' ', sizeof(Hdr.Checksum));
+
+ // Compute a checksum and set it to the checksum field.
+ unsigned Chksum = 0;
+ for (size_t I = 0; I < sizeof(Hdr); ++I)
+ Chksum += reinterpret_cast<uint8_t *>(&Hdr)[I];
+ snprintf(Hdr.Checksum, sizeof(Hdr.Checksum), "%06o", Chksum);
+}
+
+// Create a tar header and write it to a given output stream.
+static void writePaxHeader(raw_fd_ostream &OS, StringRef Path) {
+ // A PAX header consists of a 512-byte header followed
+ // by key-value strings. First, create key-value strings.
+ std::string PaxAttr = formatPax("path", Path);
+
+ // Create a 512-byte header.
+ UstarHeader Hdr = {};
+ snprintf(Hdr.Size, sizeof(Hdr.Size), "%011zo", PaxAttr.size());
+ Hdr.TypeFlag = 'x'; // PAX magic
+ memcpy(Hdr.Magic, "ustar", 6); // Ustar magic
+ computeChecksum(Hdr);
+
+ // Write them down.
+ OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr));
+ OS << PaxAttr;
+ pad(OS);
+}
+
+// The PAX header is an extended format, so a PAX header needs
+// to be followed by a "real" header.
+static void writeUstarHeader(raw_fd_ostream &OS, StringRef Path, size_t Size) {
+ UstarHeader Hdr = {};
+ memcpy(Hdr.Name, Path.data(), Path.size());
+ memcpy(Hdr.Mode, "0000664", 8);
+ snprintf(Hdr.Size, sizeof(Hdr.Size), "%011zo", Size);
+ memcpy(Hdr.Magic, "ustar", 6);
+ computeChecksum(Hdr);
+ OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr));
+}
+
+// We want to use '/' as a path separator even on Windows.
+// This function canonicalizes a given path.
+static std::string canonicalize(std::string S) {
+#ifdef LLVM_ON_WIN32
+ std::replace(S.begin(), S.end(), '\\', '/');
+#endif
+ return S;
+}
+
+// Creates a TarWriter instance and returns it.
+Expected<std::unique_ptr<TarWriter>> TarWriter::create(StringRef OutputPath,
+ StringRef BaseDir) {
+ int FD;
+ if (std::error_code EC = openFileForWrite(OutputPath, FD, sys::fs::F_None))
+ return make_error<StringError>("cannot open " + OutputPath, EC);
+ return std::unique_ptr<TarWriter>(new TarWriter(FD, BaseDir));
+}
+
+TarWriter::TarWriter(int FD, StringRef BaseDir)
+ : OS(FD, /*shouldClose=*/true, /*unbuffered=*/false), BaseDir(BaseDir) {}
+
+// Append a given file to an archive.
+void TarWriter::append(StringRef Path, StringRef Data) {
+ // Write Path and Data.
+ std::string S = BaseDir + "/" + canonicalize(Path) + "\0";
+ if (S.size() <= sizeof(UstarHeader::Name)) {
+ writeUstarHeader(OS, S, Data.size());
+ } else {
+ writePaxHeader(OS, S);
+ writeUstarHeader(OS, "", Data.size());
+ }
+
+ OS << Data;
+ pad(OS);
+
+ // POSIX requires tar archives end with two null blocks.
+ // Here, we write the terminator and then seek back, so that
+ // the file being output is terminated correctly at any moment.
+ uint64_t Pos = OS.tell();
+ OS << std::string(BlockSize * 2, '\0');
+ OS.seek(Pos);
+ OS.flush();
+}
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index 3750d7f4c09d..9752b70644c6 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -48,7 +48,7 @@
// _Unwind_Backtrace function, but on FreeBSD the configure test passes
// despite the function not existing, and on Android, <unwind.h> conflicts
// with <link.h>.
-#if defined(__GLIBC__) || defined(__APPLE__)
+#ifdef __GLIBC__
#include <unwind.h>
#else
#undef HAVE__UNWIND_BACKTRACE
diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp
index 7666011f75b6..17aafa0c3d6e 100644
--- a/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -110,72 +110,34 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
#define DEBUG_TYPE "aarch64-collect-loh"
-static cl::opt<bool>
-PreCollectRegister("aarch64-collect-loh-pre-collect-register", cl::Hidden,
- cl::desc("Restrict analysis to registers invovled"
- " in LOHs"),
- cl::init(true));
-
-static cl::opt<bool>
-BasicBlockScopeOnly("aarch64-collect-loh-bb-only", cl::Hidden,
- cl::desc("Restrict analysis at basic block scope"),
- cl::init(true));
-
STATISTIC(NumADRPSimpleCandidate,
"Number of simplifiable ADRP dominate by another");
-#ifndef NDEBUG
-STATISTIC(NumADRPComplexCandidate2,
- "Number of simplifiable ADRP reachable by 2 defs");
-STATISTIC(NumADRPComplexCandidate3,
- "Number of simplifiable ADRP reachable by 3 defs");
-STATISTIC(NumADRPComplexCandidateOther,
- "Number of simplifiable ADRP reachable by 4 or more defs");
-STATISTIC(NumADDToSTRWithImm,
- "Number of simplifiable STR with imm reachable by ADD");
-STATISTIC(NumLDRToSTRWithImm,
- "Number of simplifiable STR with imm reachable by LDR");
STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD");
STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR");
-STATISTIC(NumADDToLDRWithImm,
- "Number of simplifiable LDR with imm reachable by ADD");
-STATISTIC(NumLDRToLDRWithImm,
- "Number of simplifiable LDR with imm reachable by LDR");
STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD");
STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR");
-#endif // NDEBUG
STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP");
-#ifndef NDEBUG
-STATISTIC(NumCplxLvl1, "Number of complex case of level 1");
-STATISTIC(NumTooCplxLvl1, "Number of too complex case of level 1");
-STATISTIC(NumCplxLvl2, "Number of complex case of level 2");
-STATISTIC(NumTooCplxLvl2, "Number of too complex case of level 2");
-#endif // NDEBUG
STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD");
-STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD");
#define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)"
namespace {
+
struct AArch64CollectLOH : public MachineFunctionPass {
static char ID;
- AArch64CollectLOH() : MachineFunctionPass(ID) {
- initializeAArch64CollectLOHPass(*PassRegistry::getPassRegistry());
- }
+ AArch64CollectLOH() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -187,351 +149,57 @@ struct AArch64CollectLOH : public MachineFunctionPass {
StringRef getPassName() const override { return AARCH64_COLLECT_LOH_NAME; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
- AU.addRequired<MachineDominatorTree>();
+ AU.setPreservesAll();
}
-
-private:
};
-/// A set of MachineInstruction.
-typedef SetVector<const MachineInstr *> SetOfMachineInstr;
-/// Map a basic block to a set of instructions per register.
-/// This is used to represent the exposed uses of a basic block
-/// per register.
-typedef MapVector<const MachineBasicBlock *,
- std::unique_ptr<SetOfMachineInstr[]>>
-BlockToSetOfInstrsPerColor;
-/// Map a basic block to an instruction per register.
-/// This is used to represent the live-out definitions of a basic block
-/// per register.
-typedef MapVector<const MachineBasicBlock *,
- std::unique_ptr<const MachineInstr *[]>>
-BlockToInstrPerColor;
-/// Map an instruction to a set of instructions. Used to represent the
-/// mapping def to reachable uses or use to definitions.
-typedef MapVector<const MachineInstr *, SetOfMachineInstr> InstrToInstrs;
-/// Map a basic block to a BitVector.
-/// This is used to record the kill registers per basic block.
-typedef MapVector<const MachineBasicBlock *, BitVector> BlockToRegSet;
-
-/// Map a register to a dense id.
-typedef DenseMap<unsigned, unsigned> MapRegToId;
-/// Map a dense id to a register. Used for debug purposes.
-typedef SmallVector<unsigned, 32> MapIdToReg;
-} // end anonymous namespace.
-
char AArch64CollectLOH::ID = 0;
-INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh",
- AARCH64_COLLECT_LOH_NAME, false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh",
- AARCH64_COLLECT_LOH_NAME, false, false)
-
-/// Given a couple (MBB, reg) get the corresponding set of instruction from
-/// the given "sets".
-/// If this couple does not reference any set, an empty set is added to "sets"
-/// for this couple and returned.
-/// \param nbRegs is used internally allocate some memory. It must be consistent
-/// with the way sets is used.
-static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets,
- const MachineBasicBlock &MBB, unsigned reg,
- unsigned nbRegs) {
- SetOfMachineInstr *result;
- BlockToSetOfInstrsPerColor::iterator it = sets.find(&MBB);
- if (it != sets.end())
- result = it->second.get();
- else
- result = (sets[&MBB] = make_unique<SetOfMachineInstr[]>(nbRegs)).get();
-
- return result[reg];
-}
-
-/// Given a couple (reg, MI) get the corresponding set of instructions from the
-/// the given "sets".
-/// This is used to get the uses record in sets of a definition identified by
-/// MI and reg, i.e., MI defines reg.
-/// If the couple does not reference anything, an empty set is added to
-/// "sets[reg]".
-/// \pre set[reg] is valid.
-static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg,
- const MachineInstr &MI) {
- return sets[reg][&MI];
-}
-
-/// Same as getUses but does not modify the input map: sets.
-/// \return NULL if the couple (reg, MI) is not in sets.
-static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg,
- const MachineInstr &MI) {
- InstrToInstrs::const_iterator Res = sets[reg].find(&MI);
- if (Res != sets[reg].end())
- return &(Res->second);
- return nullptr;
-}
-
-/// Initialize the reaching definition algorithm:
-/// For each basic block BB in MF, record:
-/// - its kill set.
-/// - its reachable uses (uses that are exposed to BB's predecessors).
-/// - its the generated definitions.
-/// \param DummyOp if not NULL, specifies a Dummy Operation to be added to
-/// the list of uses of exposed defintions.
-/// \param ADRPMode specifies to only consider ADRP instructions for generated
-/// definition. It also consider definitions of ADRP instructions as uses and
-/// ignore other uses. The ADRPMode is used to collect the information for LHO
-/// that involve ADRP operation only.
-static void initReachingDef(const MachineFunction &MF,
- InstrToInstrs *ColorOpToReachedUses,
- BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
- BlockToSetOfInstrsPerColor &ReachableUses,
- const MapRegToId &RegToId,
- const MachineInstr *DummyOp, bool ADRPMode) {
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- unsigned NbReg = RegToId.size();
-
- for (const MachineBasicBlock &MBB : MF) {
- auto &BBGen = Gen[&MBB];
- BBGen = make_unique<const MachineInstr *[]>(NbReg);
- std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr);
-
- BitVector &BBKillSet = Kill[&MBB];
- BBKillSet.resize(NbReg);
- for (const MachineInstr &MI : MBB) {
- bool IsADRP = MI.getOpcode() == AArch64::ADRP;
-
- // Process uses first.
- if (IsADRP || !ADRPMode)
- for (const MachineOperand &MO : MI.operands()) {
- // Treat ADRP def as use, as the goal of the analysis is to find
- // ADRP defs reached by other ADRP defs.
- if (!MO.isReg() || (!ADRPMode && !MO.isUse()) ||
- (ADRPMode && (!IsADRP || !MO.isDef())))
- continue;
- unsigned CurReg = MO.getReg();
- MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
- if (ItCurRegId == RegToId.end())
- continue;
- CurReg = ItCurRegId->second;
-
- // if CurReg has not been defined, this use is reachable.
- if (!BBGen[CurReg] && !BBKillSet.test(CurReg))
- getSet(ReachableUses, MBB, CurReg, NbReg).insert(&MI);
- // current basic block definition for this color, if any, is in Gen.
- if (BBGen[CurReg])
- getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(&MI);
- }
-
- // Process clobbers.
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isRegMask())
- continue;
- // Clobbers kill the related colors.
- const uint32_t *PreservedRegs = MO.getRegMask();
-
- // Set generated regs.
- for (const auto &Entry : RegToId) {
- unsigned Reg = Entry.second;
- // Use the global register ID when querying APIs external to this
- // pass.
- if (MachineOperand::clobbersPhysReg(PreservedRegs, Entry.first)) {
- // Do not register clobbered definition for no ADRP.
- // This definition is not used anyway (otherwise register
- // allocation is wrong).
- BBGen[Reg] = ADRPMode ? &MI : nullptr;
- BBKillSet.set(Reg);
- }
- }
- }
-
- // Process register defs.
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef())
- continue;
- unsigned CurReg = MO.getReg();
- MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
- if (ItCurRegId == RegToId.end())
- continue;
-
- for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) {
- MapRegToId::const_iterator ItRegId = RegToId.find(*AI);
- // If this alias has not been recorded, then it is not interesting
- // for the current analysis.
- // We can end up in this situation because of tuple registers.
- // E.g., Let say we are interested in S1. When we register
- // S1, we will also register its aliases and in particular
- // the tuple Q1_Q2.
- // Now, when we encounter Q1_Q2, we will look through its aliases
- // and will find that S2 is not registered.
- if (ItRegId == RegToId.end())
- continue;
-
- BBKillSet.set(ItRegId->second);
- BBGen[ItRegId->second] = &MI;
- }
- BBGen[ItCurRegId->second] = &MI;
- }
- }
-
- // If we restrict our analysis to basic block scope, conservatively add a
- // dummy
- // use for each generated value.
- if (!ADRPMode && DummyOp && !MBB.succ_empty())
- for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg)
- if (BBGen[CurReg])
- getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(DummyOp);
- }
-}
-
-/// Reaching def core algorithm:
-/// while an Out has changed
-/// for each bb
-/// for each color
-/// In[bb][color] = U Out[bb.predecessors][color]
-/// insert reachableUses[bb][color] in each in[bb][color]
-/// op.reachedUses
-///
-/// Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
-static void reachingDefAlgorithm(const MachineFunction &MF,
- InstrToInstrs *ColorOpToReachedUses,
- BlockToSetOfInstrsPerColor &In,
- BlockToSetOfInstrsPerColor &Out,
- BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
- BlockToSetOfInstrsPerColor &ReachableUses,
- unsigned NbReg) {
- bool HasChanged;
- do {
- HasChanged = false;
- for (const MachineBasicBlock &MBB : MF) {
- unsigned CurReg;
- for (CurReg = 0; CurReg < NbReg; ++CurReg) {
- SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg);
- SetOfMachineInstr &BBReachableUses =
- getSet(ReachableUses, MBB, CurReg, NbReg);
- SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg);
- unsigned Size = BBOutSet.size();
- // In[bb][color] = U Out[bb.predecessors][color]
- for (const MachineBasicBlock *PredMBB : MBB.predecessors()) {
- SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg);
- BBInSet.insert(PredOutSet.begin(), PredOutSet.end());
- }
- // insert reachableUses[bb][color] in each in[bb][color] op.reachedses
- for (const MachineInstr *MI : BBInSet) {
- SetOfMachineInstr &OpReachedUses =
- getUses(ColorOpToReachedUses, CurReg, *MI);
- OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end());
- }
- // Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
- if (!Kill[&MBB].test(CurReg))
- BBOutSet.insert(BBInSet.begin(), BBInSet.end());
- if (Gen[&MBB][CurReg])
- BBOutSet.insert(Gen[&MBB][CurReg]);
- HasChanged |= BBOutSet.size() != Size;
- }
- }
- } while (HasChanged);
-}
-
-/// Reaching definition algorithm.
-/// \param MF function on which the algorithm will operate.
-/// \param[out] ColorOpToReachedUses will contain the result of the reaching
-/// def algorithm.
-/// \param ADRPMode specify whether the reaching def algorithm should be tuned
-/// for ADRP optimization. \see initReachingDef for more details.
-/// \param DummyOp if not NULL, the algorithm will work at
-/// basic block scope and will set for every exposed definition a use to
-/// @p DummyOp.
-/// \pre ColorOpToReachedUses is an array of at least number of registers of
-/// InstrToInstrs.
-static void reachingDef(const MachineFunction &MF,
- InstrToInstrs *ColorOpToReachedUses,
- const MapRegToId &RegToId, bool ADRPMode = false,
- const MachineInstr *DummyOp = nullptr) {
- // structures:
- // For each basic block.
- // Out: a set per color of definitions that reach the
- // out boundary of this block.
- // In: Same as Out but for in boundary.
- // Gen: generated color in this block (one operation per color).
- // Kill: register set of killed color in this block.
- // ReachableUses: a set per color of uses (operation) reachable
- // for "In" definitions.
- BlockToSetOfInstrsPerColor Out, In, ReachableUses;
- BlockToInstrPerColor Gen;
- BlockToRegSet Kill;
-
- // Initialize Gen, kill and reachableUses.
- initReachingDef(MF, ColorOpToReachedUses, Gen, Kill, ReachableUses, RegToId,
- DummyOp, ADRPMode);
-
- // Algo.
- if (!DummyOp)
- reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill,
- ReachableUses, RegToId.size());
-}
+} // end anonymous namespace.
-#ifndef NDEBUG
-/// print the result of the reaching definition algorithm.
-static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses,
- unsigned NbReg, const TargetRegisterInfo *TRI,
- const MapIdToReg &IdToReg) {
- unsigned CurReg;
- for (CurReg = 0; CurReg < NbReg; ++CurReg) {
- if (ColorOpToReachedUses[CurReg].empty())
- continue;
- DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n");
+INITIALIZE_PASS(AArch64CollectLOH, "aarch64-collect-loh",
+ AARCH64_COLLECT_LOH_NAME, false, false)
- for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) {
- DEBUG(dbgs() << "Def:\n");
- DEBUG(DefsIt.first->print(dbgs()));
- DEBUG(dbgs() << "Reachable uses:\n");
- for (const MachineInstr *MI : DefsIt.second) {
- DEBUG(MI->print(dbgs()));
- }
- }
+static bool canAddBePartOfLOH(const MachineInstr &MI) {
+ // Check immediate to see if the immediate is an address.
+ switch (MI.getOperand(2).getType()) {
+ default:
+ return false;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_BlockAddress:
+ return true;
}
}
-#endif // NDEBUG
/// Answer the following question: Can Def be one of the definition
/// involved in a part of a LOH?
-static bool canDefBePartOfLOH(const MachineInstr *Def) {
- unsigned Opc = Def->getOpcode();
+static bool canDefBePartOfLOH(const MachineInstr &MI) {
// Accept ADRP, ADDLow and LOADGot.
- switch (Opc) {
+ switch (MI.getOpcode()) {
default:
return false;
case AArch64::ADRP:
return true;
case AArch64::ADDXri:
- // Check immediate to see if the immediate is an address.
- switch (Def->getOperand(2).getType()) {
- default:
- return false;
- case MachineOperand::MO_GlobalAddress:
- case MachineOperand::MO_JumpTableIndex:
- case MachineOperand::MO_ConstantPoolIndex:
- case MachineOperand::MO_BlockAddress:
- return true;
- }
+ return canAddBePartOfLOH(MI);
case AArch64::LDRXui:
// Check immediate to see if the immediate is an address.
- switch (Def->getOperand(2).getType()) {
+ switch (MI.getOperand(2).getType()) {
default:
return false;
case MachineOperand::MO_GlobalAddress:
- return true;
+ return MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT;
}
}
- // Unreachable.
- return false;
}
/// Check whether the given instruction can the end of a LOH chain involving a
/// store.
-static bool isCandidateStore(const MachineInstr *Instr) {
- switch (Instr->getOpcode()) {
+static bool isCandidateStore(const MachineInstr &MI, const MachineOperand &MO) {
+ switch (MI.getOpcode()) {
default:
return false;
case AArch64::STRBBui:
@@ -543,109 +211,19 @@ static bool isCandidateStore(const MachineInstr *Instr) {
case AArch64::STRSui:
case AArch64::STRDui:
case AArch64::STRQui:
+ // We can only optimize the index operand.
// In case we have str xA, [xA, #imm], this is two different uses
// of xA and we cannot fold, otherwise the xA stored may be wrong,
// even if #imm == 0.
- if (Instr->getOperand(0).getReg() != Instr->getOperand(1).getReg())
- return true;
- }
- return false;
-}
-
-/// Given the result of a reaching definition algorithm in ColorOpToReachedUses,
-/// Build the Use to Defs information and filter out obvious non-LOH candidates.
-/// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions.
-/// In non-ADRPMode, non-LOH candidates are "uses" with several definition,
-/// i.e., no simple chain.
-/// \param ADRPMode -- \see initReachingDef.
-static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs,
- const InstrToInstrs *ColorOpToReachedUses,
- const MapRegToId &RegToId,
- bool ADRPMode = false) {
-
- SetOfMachineInstr NotCandidate;
- unsigned NbReg = RegToId.size();
- MapRegToId::const_iterator EndIt = RegToId.end();
- for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) {
- // If this color is never defined, continue.
- if (ColorOpToReachedUses[CurReg].empty())
- continue;
-
- for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) {
- for (const MachineInstr *MI : DefsIt.second) {
- const MachineInstr *Def = DefsIt.first;
- MapRegToId::const_iterator It;
- // if all the reaching defs are not adrp, this use will not be
- // simplifiable.
- if ((ADRPMode && Def->getOpcode() != AArch64::ADRP) ||
- (!ADRPMode && !canDefBePartOfLOH(Def)) ||
- (!ADRPMode && isCandidateStore(MI) &&
- // store are LOH candidate iff the end of the chain is used as
- // base.
- ((It = RegToId.find((MI)->getOperand(1).getReg())) == EndIt ||
- It->second != CurReg))) {
- NotCandidate.insert(MI);
- continue;
- }
- // Do not consider self reaching as a simplifiable case for ADRP.
- if (!ADRPMode || MI != DefsIt.first) {
- UseToReachingDefs[MI].insert(DefsIt.first);
- // If UsesIt has several reaching definitions, it is not
- // candidate for simplificaton in non-ADRPMode.
- if (!ADRPMode && UseToReachingDefs[MI].size() > 1)
- NotCandidate.insert(MI);
- }
- }
- }
- }
- for (const MachineInstr *Elem : NotCandidate) {
- DEBUG(dbgs() << "Too many reaching defs: " << *Elem << "\n");
- // It would have been better if we could just remove the entry
- // from the map. Because of that, we have to filter the garbage
- // (second.empty) in the subsequence analysis.
- UseToReachingDefs[Elem].clear();
- }
-}
-
-/// Based on the use to defs information (in ADRPMode), compute the
-/// opportunities of LOH ADRP-related.
-static void computeADRP(const InstrToInstrs &UseToDefs,
- AArch64FunctionInfo &AArch64FI,
- const MachineDominatorTree *MDT) {
- DEBUG(dbgs() << "*** Compute LOH for ADRP\n");
- for (const auto &Entry : UseToDefs) {
- unsigned Size = Entry.second.size();
- if (Size == 0)
- continue;
- if (Size == 1) {
- const MachineInstr *L2 = *Entry.second.begin();
- const MachineInstr *L1 = Entry.first;
- if (!MDT->dominates(L2, L1)) {
- DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1
- << '\n');
- continue;
- }
- DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n');
- AArch64FI.addLOHDirective(MCLOH_AdrpAdrp, {L2, L1});
- ++NumADRPSimpleCandidate;
- }
-#ifndef NDEBUG
- else if (Size == 2)
- ++NumADRPComplexCandidate2;
- else if (Size == 3)
- ++NumADRPComplexCandidate3;
- else
- ++NumADRPComplexCandidateOther;
-#endif
- // if Size < 1, the use should have been removed from the candidates
- assert(Size >= 1 && "No reaching defs for that use!");
+ return MI.getOperandNo(&MO) == 1 &&
+ MI.getOperand(0).getReg() != MI.getOperand(1).getReg();
}
}
/// Check whether the given instruction can be the end of a LOH chain
/// involving a load.
-static bool isCandidateLoad(const MachineInstr *Instr) {
- switch (Instr->getOpcode()) {
+static bool isCandidateLoad(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
default:
return false;
case AArch64::LDRSBWui:
@@ -660,17 +238,13 @@ static bool isCandidateLoad(const MachineInstr *Instr) {
case AArch64::LDRSui:
case AArch64::LDRDui:
case AArch64::LDRQui:
- if (Instr->getOperand(2).getTargetFlags() & AArch64II::MO_GOT)
- return false;
- return true;
+ return !(MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT);
}
- // Unreachable.
- return false;
}
/// Check whether the given instruction can load a litteral.
-static bool supportLoadFromLiteral(const MachineInstr *Instr) {
- switch (Instr->getOpcode()) {
+static bool supportLoadFromLiteral(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
default:
return false;
case AArch64::LDRSWui:
@@ -681,353 +255,233 @@ static bool supportLoadFromLiteral(const MachineInstr *Instr) {
case AArch64::LDRQui:
return true;
}
- // Unreachable.
- return false;
}
-/// Check whether the given instruction is a LOH candidate.
-/// \param UseToDefs is used to check that Instr is at the end of LOH supported
-/// chain.
-/// \pre UseToDefs contains only on def per use, i.e., obvious non candidate are
-/// already been filtered out.
-static bool isCandidate(const MachineInstr *Instr,
- const InstrToInstrs &UseToDefs,
- const MachineDominatorTree *MDT) {
- if (!isCandidateLoad(Instr) && !isCandidateStore(Instr))
- return false;
+/// Number of GPR registers traked by mapRegToGPRIndex()
+static const unsigned N_GPR_REGS = 31;
+/// Map register number to index from 0-30.
+static int mapRegToGPRIndex(MCPhysReg Reg) {
+ static_assert(AArch64::X28 - AArch64::X0 + 3 == N_GPR_REGS, "Number of GPRs");
+ static_assert(AArch64::W30 - AArch64::W0 + 1 == N_GPR_REGS, "Number of GPRs");
+ if (AArch64::X0 <= Reg && Reg <= AArch64::X28)
+ return Reg - AArch64::X0;
+ if (AArch64::W0 <= Reg && Reg <= AArch64::W30)
+ return Reg - AArch64::W0;
+ // TableGen gives "FP" and "LR" an index not adjacent to X28 so we have to
+ // handle them as special cases.
+ if (Reg == AArch64::FP)
+ return 29;
+ if (Reg == AArch64::LR)
+ return 30;
+ return -1;
+}
- const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin();
- if (Def->getOpcode() != AArch64::ADRP) {
- // At this point, Def is ADDXri or LDRXui of the right type of
- // symbol, because we filtered out the uses that were not defined
- // by these kind of instructions (+ ADRP).
+/// State tracked per register.
+/// The main algorithm walks backwards over a basic block maintaining this
+/// datastructure for each tracked general purpose register.
+struct LOHInfo {
+ MCLOHType Type : 8; ///< "Best" type of LOH possible.
+ bool IsCandidate : 1; ///< Possible LOH candidate.
+ bool OneUser : 1; ///< Found exactly one user (yet).
+ bool MultiUsers : 1; ///< Found multiple users.
+ const MachineInstr *MI0; ///< First instruction involved in the LOH.
+ const MachineInstr *MI1; ///< Second instruction involved in the LOH
+ /// (if any).
+ const MachineInstr *LastADRP; ///< Last ADRP in same register.
+};
- // Check if this forms a simple chain: each intermediate node must
- // dominates the next one.
- if (!MDT->dominates(Def, Instr))
- return false;
- // Move one node up in the simple chain.
- if (UseToDefs.find(Def) ==
- UseToDefs.end()
- // The map may contain garbage we have to ignore.
- ||
- UseToDefs.find(Def)->second.empty())
- return false;
- Instr = Def;
- Def = *UseToDefs.find(Def)->second.begin();
+/// Update state \p Info given \p MI uses the tracked register.
+static void handleUse(const MachineInstr &MI, const MachineOperand &MO,
+ LOHInfo &Info) {
+ // We have multiple uses if we already found one before.
+ if (Info.MultiUsers || Info.OneUser) {
+ Info.IsCandidate = false;
+ Info.MultiUsers = true;
+ return;
}
- // Check if we reached the top of the simple chain:
- // - top is ADRP.
- // - check the simple chain property: each intermediate node must
- // dominates the next one.
- if (Def->getOpcode() == AArch64::ADRP)
- return MDT->dominates(Def, Instr);
- return false;
-}
-
-static bool registerADRCandidate(const MachineInstr &Use,
- const InstrToInstrs &UseToDefs,
- const InstrToInstrs *DefsPerColorToUses,
- AArch64FunctionInfo &AArch64FI,
- SetOfMachineInstr *InvolvedInLOHs,
- const MapRegToId &RegToId) {
- // Look for opportunities to turn ADRP -> ADD or
- // ADRP -> LDR GOTPAGEOFF into ADR.
- // If ADRP has more than one use. Give up.
- if (Use.getOpcode() != AArch64::ADDXri &&
- (Use.getOpcode() != AArch64::LDRXui ||
- !(Use.getOperand(2).getTargetFlags() & AArch64II::MO_GOT)))
- return false;
- InstrToInstrs::const_iterator It = UseToDefs.find(&Use);
- // The map may contain garbage that we need to ignore.
- if (It == UseToDefs.end() || It->second.empty())
- return false;
- const MachineInstr &Def = **It->second.begin();
- if (Def.getOpcode() != AArch64::ADRP)
- return false;
- // Check the number of users of ADRP.
- const SetOfMachineInstr *Users =
- getUses(DefsPerColorToUses,
- RegToId.find(Def.getOperand(0).getReg())->second, Def);
- if (Users->size() > 1) {
- ++NumADRComplexCandidate;
- return false;
+ Info.OneUser = true;
+
+ // Start new LOHInfo if applicable.
+ if (isCandidateLoad(MI)) {
+ Info.Type = MCLOH_AdrpLdr;
+ Info.IsCandidate = true;
+ Info.MI0 = &MI;
+ // Note that even this is AdrpLdr now, we can switch to a Ldr variant
+ // later.
+ } else if (isCandidateStore(MI, MO)) {
+ Info.Type = MCLOH_AdrpAddStr;
+ Info.IsCandidate = true;
+ Info.MI0 = &MI;
+ Info.MI1 = nullptr;
+ } else if (MI.getOpcode() == AArch64::ADDXri) {
+ Info.Type = MCLOH_AdrpAdd;
+ Info.IsCandidate = true;
+ Info.MI0 = &MI;
+ } else if (MI.getOpcode() == AArch64::LDRXui &&
+ MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) {
+ Info.Type = MCLOH_AdrpLdrGot;
+ Info.IsCandidate = true;
+ Info.MI0 = &MI;
}
- ++NumADRSimpleCandidate;
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Def)) &&
- "ADRP already involved in LOH.");
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Use)) &&
- "ADD already involved in LOH.");
- DEBUG(dbgs() << "Record AdrpAdd\n" << Def << '\n' << Use << '\n');
-
- AArch64FI.addLOHDirective(
- Use.getOpcode() == AArch64::ADDXri ? MCLOH_AdrpAdd : MCLOH_AdrpLdrGot,
- {&Def, &Use});
- return true;
}
-/// Based on the use to defs information (in non-ADRPMode), compute the
-/// opportunities of LOH non-ADRP-related
-static void computeOthers(const InstrToInstrs &UseToDefs,
- const InstrToInstrs *DefsPerColorToUses,
- AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId,
- const MachineDominatorTree *MDT) {
- SetOfMachineInstr *InvolvedInLOHs = nullptr;
-#ifndef NDEBUG
- SetOfMachineInstr InvolvedInLOHsStorage;
- InvolvedInLOHs = &InvolvedInLOHsStorage;
-#endif // NDEBUG
- DEBUG(dbgs() << "*** Compute LOH for Others\n");
- // ADRP -> ADD/LDR -> LDR/STR pattern.
- // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern.
+/// Update state \p Info given the tracked register is clobbered.
+static void handleClobber(LOHInfo &Info) {
+ Info.IsCandidate = false;
+ Info.OneUser = false;
+ Info.MultiUsers = false;
+ Info.LastADRP = nullptr;
+}
- // FIXME: When the statistics are not important,
- // This initial filtering loop can be merged into the next loop.
- // Currently, we didn't do it to have the same code for both DEBUG and
- // NDEBUG builds. Indeed, the iterator of the second loop would need
- // to be changed.
- SetOfMachineInstr PotentialCandidates;
- SetOfMachineInstr PotentialADROpportunities;
- for (auto &Use : UseToDefs) {
- // If no definition is available, this is a non candidate.
- if (Use.second.empty())
- continue;
- // Keep only instructions that are load or store and at the end of
- // a ADRP -> ADD/LDR/Nothing chain.
- // We already filtered out the no-chain cases.
- if (!isCandidate(Use.first, UseToDefs, MDT)) {
- PotentialADROpportunities.insert(Use.first);
- continue;
+/// Update state \p Info given that \p MI is possibly the middle instruction
+/// of an LOH involving 3 instructions.
+static bool handleMiddleInst(const MachineInstr &MI, LOHInfo &DefInfo,
+ LOHInfo &OpInfo) {
+ if (!DefInfo.IsCandidate || (&DefInfo != &OpInfo && OpInfo.OneUser))
+ return false;
+ // Copy LOHInfo for dest register to LOHInfo for source register.
+ if (&DefInfo != &OpInfo) {
+ OpInfo = DefInfo;
+ // Invalidate \p DefInfo because we track it in \p OpInfo now.
+ handleClobber(DefInfo);
+ } else
+ DefInfo.LastADRP = nullptr;
+
+ // Advance state machine.
+ assert(OpInfo.IsCandidate && "Expect valid state");
+ if (MI.getOpcode() == AArch64::ADDXri && canAddBePartOfLOH(MI)) {
+ if (OpInfo.Type == MCLOH_AdrpLdr) {
+ OpInfo.Type = MCLOH_AdrpAddLdr;
+ OpInfo.IsCandidate = true;
+ OpInfo.MI1 = &MI;
+ return true;
+ } else if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) {
+ OpInfo.Type = MCLOH_AdrpAddStr;
+ OpInfo.IsCandidate = true;
+ OpInfo.MI1 = &MI;
+ return true;
}
- PotentialCandidates.insert(Use.first);
- }
-
- // Make the following distinctions for statistics as the linker does
- // know how to decode instructions:
- // - ADD/LDR/Nothing make there different patterns.
- // - LDR/STR make two different patterns.
- // Hence, 6 - 1 base patterns.
- // (because ADRP-> Nothing -> STR is not simplifiable)
-
- // The linker is only able to have a simple semantic, i.e., if pattern A
- // do B.
- // However, we want to see the opportunity we may miss if we were able to
- // catch more complex cases.
-
- // PotentialCandidates are result of a chain ADRP -> ADD/LDR ->
- // A potential candidate becomes a candidate, if its current immediate
- // operand is zero and all nodes of the chain have respectively only one user
-#ifndef NDEBUG
- SetOfMachineInstr DefsOfPotentialCandidates;
-#endif
- for (const MachineInstr *Candidate : PotentialCandidates) {
- // Get the definition of the candidate i.e., ADD or LDR.
- const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin();
- // Record the elements of the chain.
- const MachineInstr *L1 = Def;
- const MachineInstr *L2 = nullptr;
- unsigned ImmediateDefOpc = Def->getOpcode();
- if (Def->getOpcode() != AArch64::ADRP) {
- // Check the number of users of this node.
- const SetOfMachineInstr *Users =
- getUses(DefsPerColorToUses,
- RegToId.find(Def->getOperand(0).getReg())->second, *Def);
- if (Users->size() > 1) {
-#ifndef NDEBUG
- // if all the uses of this def are in potential candidate, this is
- // a complex candidate of level 2.
- bool IsLevel2 = true;
- for (const MachineInstr *MI : *Users) {
- if (!PotentialCandidates.count(MI)) {
- ++NumTooCplxLvl2;
- IsLevel2 = false;
- break;
- }
- }
- if (IsLevel2)
- ++NumCplxLvl2;
-#endif // NDEBUG
- PotentialADROpportunities.insert(Def);
- continue;
- }
- L2 = Def;
- Def = *UseToDefs.find(Def)->second.begin();
- L1 = Def;
- } // else the element in the middle of the chain is nothing, thus
- // Def already contains the first element of the chain.
-
- // Check the number of users of the first node in the chain, i.e., ADRP
- const SetOfMachineInstr *Users =
- getUses(DefsPerColorToUses,
- RegToId.find(Def->getOperand(0).getReg())->second, *Def);
- if (Users->size() > 1) {
-#ifndef NDEBUG
- // if all the uses of this def are in the defs of the potential candidate,
- // this is a complex candidate of level 1
- if (DefsOfPotentialCandidates.empty()) {
- // lazy init
- DefsOfPotentialCandidates = PotentialCandidates;
- for (const MachineInstr *Candidate : PotentialCandidates) {
- if (!UseToDefs.find(Candidate)->second.empty())
- DefsOfPotentialCandidates.insert(
- *UseToDefs.find(Candidate)->second.begin());
- }
- }
- bool Found = false;
- for (auto &Use : *Users) {
- if (!DefsOfPotentialCandidates.count(Use)) {
- ++NumTooCplxLvl1;
- Found = true;
- break;
- }
- }
- if (!Found)
- ++NumCplxLvl1;
-#endif // NDEBUG
- continue;
+ } else {
+ assert(MI.getOpcode() == AArch64::LDRXui && "Expect LDRXui");
+ assert((MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) &&
+ "Expected GOT relocation");
+ if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) {
+ OpInfo.Type = MCLOH_AdrpLdrGotStr;
+ OpInfo.IsCandidate = true;
+ OpInfo.MI1 = &MI;
+ return true;
+ } else if (OpInfo.Type == MCLOH_AdrpLdr) {
+ OpInfo.Type = MCLOH_AdrpLdrGotLdr;
+ OpInfo.IsCandidate = true;
+ OpInfo.MI1 = &MI;
+ return true;
}
+ }
+ return false;
+}
- bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri);
- // If the chain is three instructions long and ldr is the second element,
- // then this ldr must load form GOT, otherwise this is not a correct chain.
- if (L2 && !IsL2Add &&
- !(L2->getOperand(2).getTargetFlags() & AArch64II::MO_GOT))
- continue;
- SmallVector<const MachineInstr *, 3> Args;
- MCLOHType Kind;
- if (isCandidateLoad(Candidate)) {
- if (!L2) {
- // At this point, the candidate LOH indicates that the ldr instruction
- // may use a direct access to the symbol. There is not such encoding
- // for loads of byte and half.
- if (!supportLoadFromLiteral(Candidate))
- continue;
+/// Update state when seeing and ADRP instruction.
+static void handleADRP(const MachineInstr &MI, AArch64FunctionInfo &AFI,
+ LOHInfo &Info) {
+ if (Info.LastADRP != nullptr) {
+ DEBUG(dbgs() << "Adding MCLOH_AdrpAdrp:\n" << '\t' << MI << '\t'
+ << *Info.LastADRP);
+ AFI.addLOHDirective(MCLOH_AdrpAdrp, {&MI, Info.LastADRP});
+ ++NumADRPSimpleCandidate;
+ }
- DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate
- << '\n');
- Kind = MCLOH_AdrpLdr;
- Args.push_back(L1);
- Args.push_back(Candidate);
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
- "L1 already involved in LOH.");
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
- "Candidate already involved in LOH.");
+ // Produce LOH directive if possible.
+ if (Info.IsCandidate) {
+ switch (Info.Type) {
+ case MCLOH_AdrpAdd:
+ DEBUG(dbgs() << "Adding MCLOH_AdrpAdd:\n" << '\t' << MI << '\t'
+ << *Info.MI0);
+ AFI.addLOHDirective(MCLOH_AdrpAdd, {&MI, Info.MI0});
+ ++NumADRSimpleCandidate;
+ break;
+ case MCLOH_AdrpLdr:
+ if (supportLoadFromLiteral(*Info.MI0)) {
+ DEBUG(dbgs() << "Adding MCLOH_AdrpLdr:\n" << '\t' << MI << '\t'
+ << *Info.MI0);
+ AFI.addLOHDirective(MCLOH_AdrpLdr, {&MI, Info.MI0});
++NumADRPToLDR;
- } else {
- DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
- << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
- << '\n');
-
- Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr;
- Args.push_back(L1);
- Args.push_back(L2);
- Args.push_back(Candidate);
-
- PotentialADROpportunities.remove(L2);
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
- "L1 already involved in LOH.");
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
- "L2 already involved in LOH.");
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
- "Candidate already involved in LOH.");
-#ifndef NDEBUG
- // get the immediate of the load
- if (Candidate->getOperand(2).getImm() == 0)
- if (ImmediateDefOpc == AArch64::ADDXri)
- ++NumADDToLDR;
- else
- ++NumLDRToLDR;
- else if (ImmediateDefOpc == AArch64::ADDXri)
- ++NumADDToLDRWithImm;
- else
- ++NumLDRToLDRWithImm;
-#endif // NDEBUG
}
- } else {
- if (ImmediateDefOpc == AArch64::ADRP)
- continue;
- else {
-
- DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
- << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
- << '\n');
-
- Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr;
- Args.push_back(L1);
- Args.push_back(L2);
- Args.push_back(Candidate);
-
- PotentialADROpportunities.remove(L2);
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
- "L1 already involved in LOH.");
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
- "L2 already involved in LOH.");
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
- "Candidate already involved in LOH.");
-#ifndef NDEBUG
- // get the immediate of the store
- if (Candidate->getOperand(2).getImm() == 0)
- if (ImmediateDefOpc == AArch64::ADDXri)
- ++NumADDToSTR;
- else
- ++NumLDRToSTR;
- else if (ImmediateDefOpc == AArch64::ADDXri)
- ++NumADDToSTRWithImm;
- else
- ++NumLDRToSTRWithImm;
-#endif // DEBUG
+ break;
+ case MCLOH_AdrpAddLdr:
+ DEBUG(dbgs() << "Adding MCLOH_AdrpAddLdr:\n" << '\t' << MI << '\t'
+ << *Info.MI1 << '\t' << *Info.MI0);
+ AFI.addLOHDirective(MCLOH_AdrpAddLdr, {&MI, Info.MI1, Info.MI0});
+ ++NumADDToLDR;
+ break;
+ case MCLOH_AdrpAddStr:
+ if (Info.MI1 != nullptr) {
+ DEBUG(dbgs() << "Adding MCLOH_AdrpAddStr:\n" << '\t' << MI << '\t'
+ << *Info.MI1 << '\t' << *Info.MI0);
+ AFI.addLOHDirective(MCLOH_AdrpAddStr, {&MI, Info.MI1, Info.MI0});
+ ++NumADDToSTR;
}
+ break;
+ case MCLOH_AdrpLdrGotLdr:
+ DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotLdr:\n" << '\t' << MI << '\t'
+ << *Info.MI1 << '\t' << *Info.MI0);
+ AFI.addLOHDirective(MCLOH_AdrpLdrGotLdr, {&MI, Info.MI1, Info.MI0});
+ ++NumLDRToLDR;
+ break;
+ case MCLOH_AdrpLdrGotStr:
+ DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotStr:\n" << '\t' << MI << '\t'
+ << *Info.MI1 << '\t' << *Info.MI0);
+ AFI.addLOHDirective(MCLOH_AdrpLdrGotStr, {&MI, Info.MI1, Info.MI0});
+ ++NumLDRToSTR;
+ break;
+ case MCLOH_AdrpLdrGot:
+ DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGot:\n" << '\t' << MI << '\t'
+ << *Info.MI0);
+ AFI.addLOHDirective(MCLOH_AdrpLdrGot, {&MI, Info.MI0});
+ break;
+ case MCLOH_AdrpAdrp:
+ llvm_unreachable("MCLOH_AdrpAdrp not used in state machine");
}
- AArch64FI.addLOHDirective(Kind, Args);
}
- // Now, we grabbed all the big patterns, check ADR opportunities.
- for (const MachineInstr *Candidate : PotentialADROpportunities)
- registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, AArch64FI,
- InvolvedInLOHs, RegToId);
+ handleClobber(Info);
+ Info.LastADRP = &MI;
}
-/// Look for every register defined by potential LOHs candidates.
-/// Map these registers with dense id in @p RegToId and vice-versa in
-/// @p IdToReg. @p IdToReg is populated only in DEBUG mode.
-static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId,
- MapIdToReg &IdToReg,
- const TargetRegisterInfo *TRI) {
- unsigned CurRegId = 0;
- if (!PreCollectRegister) {
- unsigned NbReg = TRI->getNumRegs();
- for (; CurRegId < NbReg; ++CurRegId) {
- RegToId[CurRegId] = CurRegId;
- DEBUG(IdToReg.push_back(CurRegId));
- DEBUG(assert(IdToReg[CurRegId] == CurRegId && "Reg index mismatches"));
- }
+static void handleRegMaskClobber(const uint32_t *RegMask, MCPhysReg Reg,
+ LOHInfo *LOHInfos) {
+ if (!MachineOperand::clobbersPhysReg(RegMask, Reg))
return;
- }
-
- DEBUG(dbgs() << "** Collect Involved Register\n");
- for (const auto &MBB : MF) {
- for (const MachineInstr &MI : MBB) {
- if (!canDefBePartOfLOH(&MI) &&
- !isCandidateLoad(&MI) && !isCandidateStore(&MI))
- continue;
+ int Idx = mapRegToGPRIndex(Reg);
+ if (Idx >= 0)
+ handleClobber(LOHInfos[Idx]);
+}
- // Process defs
- for (MachineInstr::const_mop_iterator IO = MI.operands_begin(),
- IOEnd = MI.operands_end();
- IO != IOEnd; ++IO) {
- if (!IO->isReg() || !IO->isDef())
- continue;
- unsigned CurReg = IO->getReg();
- for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI)
- if (RegToId.find(*AI) == RegToId.end()) {
- DEBUG(IdToReg.push_back(*AI);
- assert(IdToReg[CurRegId] == *AI &&
- "Reg index mismatches insertion index."));
- RegToId[*AI] = CurRegId++;
- DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) << '\n');
- }
- }
+static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) {
+ // Handle defs and regmasks.
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isRegMask()) {
+ const uint32_t *RegMask = MO.getRegMask();
+ for (MCPhysReg Reg : AArch64::GPR32RegClass)
+ handleRegMaskClobber(RegMask, Reg, LOHInfos);
+ for (MCPhysReg Reg : AArch64::GPR64RegClass)
+ handleRegMaskClobber(RegMask, Reg, LOHInfos);
+ continue;
}
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ int Idx = mapRegToGPRIndex(MO.getReg());
+ if (Idx < 0)
+ continue;
+ handleClobber(LOHInfos[Idx]);
+ }
+ // Handle uses.
+ for (const MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || !MO.readsReg())
+ continue;
+ int Idx = mapRegToGPRIndex(MO.getReg());
+ if (Idx < 0)
+ continue;
+ handleUse(MI, MO, LOHInfos[Idx]);
}
}
@@ -1035,74 +489,59 @@ bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
return false;
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
-
- MapRegToId RegToId;
- MapIdToReg IdToReg;
- AArch64FunctionInfo *AArch64FI = MF.getInfo<AArch64FunctionInfo>();
- assert(AArch64FI && "No MachineFunctionInfo for this function!");
-
- DEBUG(dbgs() << "Looking for LOH in " << MF.getName() << '\n');
+ DEBUG(dbgs() << "********** AArch64 Collect LOH **********\n"
+ << "Looking in function " << MF.getName() << '\n');
- collectInvolvedReg(MF, RegToId, IdToReg, TRI);
- if (RegToId.empty())
- return false;
+ LOHInfo LOHInfos[N_GPR_REGS];
+ AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+ for (const MachineBasicBlock &MBB : MF) {
+ // Reset register tracking state.
+ memset(LOHInfos, 0, sizeof(LOHInfos));
+ // Live-out registers are used.
+ for (const MachineBasicBlock *Succ : MBB.successors()) {
+ for (const auto &LI : Succ->liveins()) {
+ int RegIdx = mapRegToGPRIndex(LI.PhysReg);
+ if (RegIdx >= 0)
+ LOHInfos[RegIdx].OneUser = true;
+ }
+ }
- MachineInstr *DummyOp = nullptr;
- if (BasicBlockScopeOnly) {
- const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
- // For local analysis, create a dummy operation to record uses that are not
- // local.
- DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc());
+ // Walk the basic block backwards and update the per register state machine
+ // in the process.
+ for (const MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ case AArch64::ADDXri:
+ case AArch64::LDRXui:
+ if (canDefBePartOfLOH(MI)) {
+ const MachineOperand &Def = MI.getOperand(0);
+ const MachineOperand &Op = MI.getOperand(1);
+ assert(Def.isReg() && Def.isDef() && "Expected reg def");
+ assert(Op.isReg() && Op.isUse() && "Expected reg use");
+ int DefIdx = mapRegToGPRIndex(Def.getReg());
+ int OpIdx = mapRegToGPRIndex(Op.getReg());
+ if (DefIdx >= 0 && OpIdx >= 0 &&
+ handleMiddleInst(MI, LOHInfos[DefIdx], LOHInfos[OpIdx]))
+ continue;
+ }
+ break;
+ case AArch64::ADRP:
+ const MachineOperand &Op0 = MI.getOperand(0);
+ int Idx = mapRegToGPRIndex(Op0.getReg());
+ if (Idx >= 0) {
+ handleADRP(MI, AFI, LOHInfos[Idx]);
+ continue;
+ }
+ break;
+ }
+ handleNormalInst(MI, LOHInfos);
+ }
}
- unsigned NbReg = RegToId.size();
- bool Modified = false;
-
- // Start with ADRP.
- InstrToInstrs *ColorOpToReachedUses = new InstrToInstrs[NbReg];
-
- // Compute the reaching def in ADRP mode, meaning ADRP definitions
- // are first considered as uses.
- reachingDef(MF, ColorOpToReachedUses, RegToId, true, DummyOp);
- DEBUG(dbgs() << "ADRP reaching defs\n");
- DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
-
- // Translate the definition to uses map into a use to definitions map to ease
- // statistic computation.
- InstrToInstrs ADRPToReachingDefs;
- reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true);
-
- // Compute LOH for ADRP.
- computeADRP(ADRPToReachingDefs, *AArch64FI, MDT);
- delete[] ColorOpToReachedUses;
-
- // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern.
- ColorOpToReachedUses = new InstrToInstrs[NbReg];
-
- // first perform a regular reaching def analysis.
- reachingDef(MF, ColorOpToReachedUses, RegToId, false, DummyOp);
- DEBUG(dbgs() << "All reaching defs\n");
- DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
-
- // Turn that into a use to defs to ease statistic computation.
- InstrToInstrs UsesToReachingDefs;
- reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false);
-
- // Compute other than AdrpAdrp LOH.
- computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *AArch64FI, RegToId,
- MDT);
- delete[] ColorOpToReachedUses;
-
- if (BasicBlockScopeOnly)
- MF.DeleteMachineInstr(DummyOp);
-
- return Modified;
+ // Return "no change": The pass only collects information.
+ return false;
}
-/// createAArch64CollectLOHPass - returns an instance of the Statistic for
-/// linker optimization pass.
FunctionPass *llvm::createAArch64CollectLOHPass() {
return new AArch64CollectLOH();
}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4c98253878e4..74a01835171b 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11,28 +11,79 @@
//
//===----------------------------------------------------------------------===//
-#include "AArch64ISelLowering.h"
#include "AArch64CallingConvention.h"
#include "AArch64MachineFunctionInfo.h"
+#include "AArch64ISelLowering.h"
#include "AArch64PerfectShuffle.h"
+#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
-#include "AArch64TargetMachine.h"
-#include "AArch64TargetObjectFile.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetCallingConv.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#include <bitset>
+#include <cassert>
+#include <cctype>
+#include <cstdint>
+#include <cstdlib>
+#include <iterator>
+#include <limits>
+#include <tuple>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "aarch64-lower"
@@ -59,7 +110,6 @@ static const MVT MVT_CC = MVT::i32;
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
-
// AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
// we have to make something up. Arbitrarily, choose ZeroOrOne.
setBooleanContents(ZeroOrOneBooleanContent);
@@ -218,7 +268,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
-
setOperationAction(ISD::CTPOP, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i64, Custom);
@@ -3632,6 +3681,7 @@ SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
llvm_unreachable("Unexpected platform trying to use TLS");
}
+
SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
@@ -4549,7 +4599,6 @@ SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
return DAG.getMergeValues(Ops, dl);
}
-
/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
/// i64 values and take a 2 x i64 value to shift plus a shift amount.
SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
@@ -5074,10 +5123,11 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
int WindowBase;
int WindowScale;
- bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
ShuffleSourceInfo(SDValue Vec)
- : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0),
- WindowScale(1) {}
+ : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
+ ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
+
+ bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
};
// First gather all vectors used as an immediate source for this BUILD_VECTOR
@@ -7028,7 +7078,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return true;
}
case Intrinsic::aarch64_ldaxp:
- case Intrinsic::aarch64_ldxp: {
+ case Intrinsic::aarch64_ldxp:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i128;
Info.ptrVal = I.getArgOperand(0);
@@ -7038,9 +7088,8 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.readMem = true;
Info.writeMem = false;
return true;
- }
case Intrinsic::aarch64_stlxp:
- case Intrinsic::aarch64_stxp: {
+ case Intrinsic::aarch64_stxp:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i128;
Info.ptrVal = I.getArgOperand(2);
@@ -7050,7 +7099,6 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.readMem = false;
Info.writeMem = true;
return true;
- }
default:
break;
}
@@ -8044,13 +8092,13 @@ static SDValue tryCombineToEXTR(SDNode *N,
SDValue LHS;
uint32_t ShiftLHS = 0;
- bool LHSFromHi = 0;
+ bool LHSFromHi = false;
if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
return SDValue();
SDValue RHS;
uint32_t ShiftRHS = 0;
- bool RHSFromHi = 0;
+ bool RHSFromHi = false;
if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
return SDValue();
@@ -9732,52 +9780,51 @@ static bool isEquivalentMaskless(unsigned CC, unsigned width,
switch(CC) {
case AArch64CC::LE:
- case AArch64CC::GT: {
+ case AArch64CC::GT:
if ((AddConstant == 0) ||
(CompConstant == MaxUInt - 1 && AddConstant < 0) ||
(AddConstant >= 0 && CompConstant < 0) ||
(AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
return true;
- } break;
+ break;
case AArch64CC::LT:
- case AArch64CC::GE: {
+ case AArch64CC::GE:
if ((AddConstant == 0) ||
(AddConstant >= 0 && CompConstant <= 0) ||
(AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
return true;
- } break;
+ break;
case AArch64CC::HI:
- case AArch64CC::LS: {
+ case AArch64CC::LS:
if ((AddConstant >= 0 && CompConstant < 0) ||
(AddConstant <= 0 && CompConstant >= -1 &&
CompConstant < AddConstant + MaxUInt))
return true;
- } break;
+ break;
case AArch64CC::PL:
- case AArch64CC::MI: {
+ case AArch64CC::MI:
if ((AddConstant == 0) ||
(AddConstant > 0 && CompConstant <= 0) ||
(AddConstant < 0 && CompConstant <= AddConstant))
return true;
- } break;
+ break;
case AArch64CC::LO:
- case AArch64CC::HS: {
+ case AArch64CC::HS:
if ((AddConstant >= 0 && CompConstant <= 0) ||
(AddConstant <= 0 && CompConstant >= 0 &&
CompConstant <= AddConstant + MaxUInt))
return true;
- } break;
+ break;
case AArch64CC::EQ:
- case AArch64CC::NE: {
+ case AArch64CC::NE:
if ((AddConstant > 0 && CompConstant < 0) ||
(AddConstant < 0 && CompConstant >= 0 &&
CompConstant < AddConstant + MaxUInt) ||
(AddConstant >= 0 && CompConstant >= 0 &&
CompConstant >= AddConstant) ||
(AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
-
return true;
- } break;
+ break;
case AArch64CC::VS:
case AArch64CC::VC:
case AArch64CC::AL:
@@ -10501,7 +10548,7 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
if (ValTy->getPrimitiveSizeInBits() == 128) {
Intrinsic::ID Int =
IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
- Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int);
+ Function *Ldxr = Intrinsic::getDeclaration(M, Int);
Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi");
@@ -10517,7 +10564,7 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
Type *Tys[] = { Addr->getType() };
Intrinsic::ID Int =
IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
- Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int, Tys);
+ Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);
return Builder.CreateTruncOrBitCast(
Builder.CreateCall(Ldxr, Addr),
@@ -10527,8 +10574,7 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
IRBuilder<> &Builder) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
- Builder.CreateCall(
- llvm::Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
+ Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
}
Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 626c934f236e..5c8acba26aab 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -14,16 +14,37 @@
#include "AArch64InstrInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-#include <algorithm>
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
using namespace llvm;
@@ -529,19 +550,19 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
default:
llvm_unreachable("Unknown branch opcode in Cond");
case AArch64::CBZW:
- Is64Bit = 0;
+ Is64Bit = false;
CC = AArch64CC::EQ;
break;
case AArch64::CBZX:
- Is64Bit = 1;
+ Is64Bit = true;
CC = AArch64CC::EQ;
break;
case AArch64::CBNZW:
- Is64Bit = 0;
+ Is64Bit = false;
CC = AArch64CC::NE;
break;
case AArch64::CBNZX:
- Is64Bit = 1;
+ Is64Bit = true;
CC = AArch64CC::NE;
break;
}
@@ -1044,7 +1065,7 @@ static unsigned sForm(MachineInstr &Instr) {
case AArch64::SUBSWri:
case AArch64::SUBSXrr:
case AArch64::SUBSXri:
- return Instr.getOpcode();;
+ return Instr.getOpcode();
case AArch64::ADDWrr: return AArch64::ADDSWrr;
case AArch64::ADDWri: return AArch64::ADDSWri;
@@ -1072,12 +1093,15 @@ static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
}
namespace {
+
struct UsedNZCV {
- bool N;
- bool Z;
- bool C;
- bool V;
- UsedNZCV(): N(false), Z(false), C(false), V(false) {}
+ bool N = false;
+ bool Z = false;
+ bool C = false;
+ bool V = false;
+
+ UsedNZCV() = default;
+
UsedNZCV& operator |=(const UsedNZCV& UsedFlags) {
this->N |= UsedFlags.N;
this->Z |= UsedFlags.Z;
@@ -1086,6 +1110,7 @@ struct UsedNZCV {
return *this;
}
};
+
} // end anonymous namespace
/// Find a condition code used by the instruction.
@@ -1561,7 +1586,7 @@ bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) const {
/// Check all MachineMemOperands for a hint to suppress pairing.
bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) const {
- return any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
+ return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
return MMO->getFlags() & MOSuppressPair;
});
}
@@ -1994,7 +2019,7 @@ static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
void AArch64InstrInfo::copyPhysRegTuple(
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
- llvm::ArrayRef<unsigned> Indices) const {
+ ArrayRef<unsigned> Indices) const {
assert(Subtarget.hasNEON() &&
"Unexpected register copy without NEON");
const TargetRegisterInfo *TRI = &getRegisterInfo();
@@ -2583,7 +2608,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
//
// <rdar://problem/11522048>
//
- if (MI.isCopy()) {
+ if (MI.isFullCopy()) {
unsigned DstReg = MI.getOperand(0).getReg();
unsigned SrcReg = MI.getOperand(1).getReg();
if (SrcReg == AArch64::SP &&
@@ -2598,7 +2623,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
}
}
- // Handle the case where a copy is being spilled or refilled but the source
+ // Handle the case where a copy is being spilled or filled but the source
// and destination register class don't match. For example:
//
// %vreg0<def> = COPY %XZR; GPR64common:%vreg0
@@ -2613,7 +2638,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
//
// %vreg0<def> = COPY %vreg1; GPR64:%vreg0, FPR64:%vreg1
//
- // will be refilled as
+ // will be filled as
//
// LDRDui %vreg0, fi<#0>
//
@@ -2622,9 +2647,11 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
// LDRXui %vregTemp, fi<#0>
// %vreg0 = FMOV %vregTemp
//
- if (MI.isFullCopy() && Ops.size() == 1 &&
+ if (MI.isCopy() && Ops.size() == 1 &&
// Make sure we're only folding the explicit COPY defs/uses.
(Ops[0] == 0 || Ops[0] == 1)) {
+ bool IsSpill = Ops[0] == 0;
+ bool IsFill = !IsSpill;
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
MachineBasicBlock &MBB = *MI.getParent();
@@ -2632,21 +2659,112 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
const MachineOperand &SrcMO = MI.getOperand(1);
unsigned DstReg = DstMO.getReg();
unsigned SrcReg = SrcMO.getReg();
+ // This is slightly expensive to compute for physical regs since
+ // getMinimalPhysRegClass is slow.
auto getRegClass = [&](unsigned Reg) {
return TargetRegisterInfo::isVirtualRegister(Reg)
? MRI.getRegClass(Reg)
: TRI.getMinimalPhysRegClass(Reg);
};
- const TargetRegisterClass &DstRC = *getRegClass(DstReg);
- const TargetRegisterClass &SrcRC = *getRegClass(SrcReg);
- if (DstRC.getSize() == SrcRC.getSize()) {
- if (Ops[0] == 0)
+
+ if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
+ assert(getRegClass(DstReg)->getSize() == getRegClass(SrcReg)->getSize() &&
+ "Mismatched register size in non subreg COPY");
+ if (IsSpill)
storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
- &SrcRC, &TRI);
+ getRegClass(SrcReg), &TRI);
else
- loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, &DstRC, &TRI);
+ loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
+ getRegClass(DstReg), &TRI);
return &*--InsertPt;
}
+
+ // Handle cases like spilling def of:
+ //
+ // %vreg0:sub_32<def,read-undef> = COPY %WZR; GPR64common:%vreg0
+ //
+ // where the physical register source can be widened and stored to the full
+ // virtual reg destination stack slot, in this case producing:
+ //
+ // STRXui %XZR, <fi#0>
+ //
+ if (IsSpill && DstMO.isUndef() &&
+ TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+ assert(SrcMO.getSubReg() == 0 &&
+ "Unexpected subreg on physical register");
+ const TargetRegisterClass *SpillRC;
+ unsigned SpillSubreg;
+ switch (DstMO.getSubReg()) {
+ default:
+ SpillRC = nullptr;
+ break;
+ case AArch64::sub_32:
+ case AArch64::ssub:
+ if (AArch64::GPR32RegClass.contains(SrcReg)) {
+ SpillRC = &AArch64::GPR64RegClass;
+ SpillSubreg = AArch64::sub_32;
+ } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
+ SpillRC = &AArch64::FPR64RegClass;
+ SpillSubreg = AArch64::ssub;
+ } else
+ SpillRC = nullptr;
+ break;
+ case AArch64::dsub:
+ if (AArch64::FPR64RegClass.contains(SrcReg)) {
+ SpillRC = &AArch64::FPR128RegClass;
+ SpillSubreg = AArch64::dsub;
+ } else
+ SpillRC = nullptr;
+ break;
+ }
+
+ if (SpillRC)
+ if (unsigned WidenedSrcReg =
+ TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
+ storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
+ FrameIndex, SpillRC, &TRI);
+ return &*--InsertPt;
+ }
+ }
+
+ // Handle cases like filling use of:
+ //
+ // %vreg0:sub_32<def,read-undef> = COPY %vreg1; GPR64:%vreg0, GPR32:%vreg1
+ //
+ // where we can load the full virtual reg source stack slot, into the subreg
+ // destination, in this case producing:
+ //
+ // LDRWui %vreg0:sub_32<def,read-undef>, <fi#0>
+ //
+ if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
+ const TargetRegisterClass *FillRC;
+ switch (DstMO.getSubReg()) {
+ default:
+ FillRC = nullptr;
+ break;
+ case AArch64::sub_32:
+ FillRC = &AArch64::GPR32RegClass;
+ break;
+ case AArch64::ssub:
+ FillRC = &AArch64::FPR32RegClass;
+ break;
+ case AArch64::dsub:
+ FillRC = &AArch64::FPR64RegClass;
+ break;
+ }
+
+ if (FillRC) {
+ assert(getRegClass(SrcReg)->getSize() == FillRC->getSize() &&
+ "Mismatched regclass size on folded subreg COPY");
+ loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
+ MachineInstr &LoadMI = *--InsertPt;
+ MachineOperand &LoadDst = LoadMI.getOperand(0);
+ assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
+ LoadDst.setSubReg(DstMO.getSubReg());
+ LoadDst.setIsUndef();
+ return &LoadMI;
+ }
+ }
}
// Cannot fold.
@@ -2936,7 +3054,7 @@ bool AArch64InstrInfo::useMachineCombiner() const {
return true;
}
-//
+
// True when Opc sets flag
static bool isCombineInstrSettingFlag(unsigned Opc) {
switch (Opc) {
@@ -2955,7 +3073,7 @@ static bool isCombineInstrSettingFlag(unsigned Opc) {
}
return false;
}
-//
+
// 32b Opcodes that can be combined with a MUL
static bool isCombineInstrCandidate32(unsigned Opc) {
switch (Opc) {
@@ -2974,7 +3092,7 @@ static bool isCombineInstrCandidate32(unsigned Opc) {
}
return false;
}
-//
+
// 64b Opcodes that can be combined with a MUL
static bool isCombineInstrCandidate64(unsigned Opc) {
switch (Opc) {
@@ -2993,7 +3111,7 @@ static bool isCombineInstrCandidate64(unsigned Opc) {
}
return false;
}
-//
+
// FP Opcodes that can be combined with a FMUL
static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
switch (Inst.getOpcode()) {
@@ -3009,13 +3127,13 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
case AArch64::FSUBv2f32:
case AArch64::FSUBv2f64:
case AArch64::FSUBv4f32:
- TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
- return (Options.UnsafeFPMath ||
- Options.AllowFPOpFusion == FPOpFusion::Fast);
+ TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
+ return (Options.UnsafeFPMath ||
+ Options.AllowFPOpFusion == FPOpFusion::Fast);
}
return false;
}
-//
+
// Opcodes that can be combined with a MUL
static bool isCombineInstrCandidate(unsigned Opc) {
return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
@@ -3205,7 +3323,7 @@ static bool getFMAPatterns(MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern> &Patterns) {
if (!isCombineInstrCandidateFP(Root))
- return 0;
+ return false;
MachineBasicBlock &MBB = *Root.getParent();
bool Found = false;
@@ -3971,8 +4089,6 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
// Record MUL and ADD/SUB for deletion
DelInstrs.push_back(MUL);
DelInstrs.push_back(&Root);
-
- return;
}
/// \brief Replace csincr-branch sequence by simple conditional branch
@@ -4148,6 +4264,7 @@ AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
ArrayRef<std::pair<unsigned, const char *>>
AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
using namespace AArch64II;
+
static const std::pair<unsigned, const char *> TargetFlags[] = {
{MO_PAGE, "aarch64-page"},
{MO_PAGEOFF, "aarch64-pageoff"},
@@ -4162,6 +4279,7 @@ AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
ArrayRef<std::pair<unsigned, const char *>>
AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
using namespace AArch64II;
+
static const std::pair<unsigned, const char *> TargetFlags[] = {
{MO_GOT, "aarch64-got"},
{MO_NC, "aarch64-nc"},
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index 90b2c0896872..5037866925d3 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -162,6 +162,10 @@ public:
int FrameIndex, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
+ // This tells target independent code that it is okay to pass instructions
+ // with subreg operands to foldMemoryOperandImpl.
+ bool isSubregFoldable() const override { return true; }
+
using TargetInstrInfo::foldMemoryOperandImpl;
MachineInstr *
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 20de07424c53..b51473524c72 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -1071,8 +1071,12 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
return false;
}
- const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
- (CmpInst::Predicate)I.getOperand(1).getPredicate());
+ // CSINC increments the result by one when the condition code is false.
+ // Therefore, we have to invert the predicate to get an increment by 1 when
+ // the predicate is true.
+ const AArch64CC::CondCode invCC =
+ changeICMPPredToAArch64CC(CmpInst::getInversePredicate(
+ (CmpInst::Predicate)I.getOperand(1).getPredicate()));
MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
.addDef(ZReg)
@@ -1084,7 +1088,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
.addDef(I.getOperand(0).getReg())
.addUse(AArch64::WZR)
.addUse(AArch64::WZR)
- .addImm(CC);
+ .addImm(invCC);
constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.h b/lib/Target/AArch64/AArch64InstructionSelector.h
index 0d44e696ac20..2c6e5a912fb7 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.h
+++ b/lib/Target/AArch64/AArch64InstructionSelector.h
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
namespace llvm {
+
class AArch64InstrInfo;
class AArch64RegisterBankInfo;
class AArch64RegisterInfo;
@@ -29,7 +30,7 @@ public:
const AArch64Subtarget &STI,
const AArch64RegisterBankInfo &RBI);
- virtual bool select(MachineInstr &I) const override;
+ bool select(MachineInstr &I) const override;
private:
/// tblgen-erated 'select' implementation, used as the initial selector for
@@ -43,5 +44,6 @@ private:
const AArch64RegisterBankInfo &RBI;
};
-} // End llvm namespace.
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AARCH64_AARCH64INSTRUCTIONSELECTOR_H
diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index ca2860afe13d..f0bffe544158 100644
--- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -14,17 +14,18 @@
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINEFUNCTIONINFO_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
+#include <cassert>
namespace llvm {
/// AArch64FunctionInfo - This class is derived from MachineFunctionInfo and
/// contains private AArch64-specific information for each MachineFunction.
class AArch64FunctionInfo final : public MachineFunctionInfo {
-
/// Number of bytes of arguments this function has on the stack. If the callee
/// is expected to restore the argument stack this should be a multiple of 16,
/// all usable during a tail call.
@@ -34,16 +35,16 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// space to a function with 16-bytes then misalignment of this value would
/// make a stack adjustment necessary, which could not be undone by the
/// callee.
- unsigned BytesInStackArgArea;
+ unsigned BytesInStackArgArea = 0;
/// The number of bytes to restore to deallocate space for incoming
/// arguments. Canonically 0 in the C calling convention, but non-zero when
/// callee is expected to pop the args.
- unsigned ArgumentStackToRestore;
+ unsigned ArgumentStackToRestore = 0;
/// HasStackFrame - True if this function has a stack frame. Set by
/// determineCalleeSaves().
- bool HasStackFrame;
+ bool HasStackFrame = false;
/// \brief Amount of stack frame size, not including callee-saved registers.
unsigned LocalStackSize;
@@ -53,54 +54,44 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// \brief Number of TLS accesses using the special (combinable)
/// _TLS_MODULE_BASE_ symbol.
- unsigned NumLocalDynamicTLSAccesses;
+ unsigned NumLocalDynamicTLSAccesses = 0;
/// \brief FrameIndex for start of varargs area for arguments passed on the
/// stack.
- int VarArgsStackIndex;
+ int VarArgsStackIndex = 0;
/// \brief FrameIndex for start of varargs area for arguments passed in
/// general purpose registers.
- int VarArgsGPRIndex;
+ int VarArgsGPRIndex = 0;
/// \brief Size of the varargs area for arguments passed in general purpose
/// registers.
- unsigned VarArgsGPRSize;
+ unsigned VarArgsGPRSize = 0;
/// \brief FrameIndex for start of varargs area for arguments passed in
/// floating-point registers.
- int VarArgsFPRIndex;
+ int VarArgsFPRIndex = 0;
/// \brief Size of the varargs area for arguments passed in floating-point
/// registers.
- unsigned VarArgsFPRSize;
+ unsigned VarArgsFPRSize = 0;
/// True if this function has a subset of CSRs that is handled explicitly via
/// copies.
- bool IsSplitCSR;
+ bool IsSplitCSR = false;
/// True when the stack gets realigned dynamically because the size of stack
/// frame is unknown at compile time. e.g., in case of VLAs.
- bool StackRealigned;
+ bool StackRealigned = false;
/// True when the callee-save stack area has unused gaps that may be used for
/// other stack allocations.
- bool CalleeSaveStackHasFreeSpace;
+ bool CalleeSaveStackHasFreeSpace = false;
public:
- AArch64FunctionInfo()
- : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
- NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
- VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
- IsSplitCSR(false), StackRealigned(false),
- CalleeSaveStackHasFreeSpace(false) {}
-
- explicit AArch64FunctionInfo(MachineFunction &MF)
- : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
- NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
- VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
- IsSplitCSR(false), StackRealigned(false),
- CalleeSaveStackHasFreeSpace(false) {
+ AArch64FunctionInfo() = default;
+
+ explicit AArch64FunctionInfo(MachineFunction &MF) {
(void)MF;
}
@@ -193,6 +184,7 @@ private:
MILOHContainer LOHContainerSet;
SetOfInstructions LOHRelated;
};
-} // End llvm namespace
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AARCH64_AARCH64MACHINEFUNCTIONINFO_H
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index f58bbbd26132..03e01329e036 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -71,6 +71,7 @@ void AArch64Subtarget::initializeProperties() {
break;
case Falkor:
MaxInterleaveFactor = 4;
+ VectorInsertExtractBaseCost = 2;
break;
case Kryo:
MaxInterleaveFactor = 4;
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index e4ef0d4bb8db..d2883941e2c4 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -15,24 +15,35 @@
#include "AArch64InstructionSelector.h"
#include "AArch64LegalizerInfo.h"
#include "AArch64RegisterBankInfo.h"
+#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "AArch64TargetObjectFile.h"
#include "AArch64TargetTransformInfo.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
+#include <memory>
+#include <string>
+
using namespace llvm;
static cl::opt<bool> EnableCCMP("aarch64-enable-ccmp",
@@ -154,9 +165,9 @@ extern "C" void LLVMInitializeAArch64Target() {
//===----------------------------------------------------------------------===//
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
if (TT.isOSBinFormatMachO())
- return make_unique<AArch64_MachoTargetObjectFile>();
+ return llvm::make_unique<AArch64_MachoTargetObjectFile>();
- return make_unique<AArch64_ELFTargetObjectFile>();
+ return llvm::make_unique<AArch64_ELFTargetObjectFile>();
}
// Helper function to build a DataLayout string
@@ -202,29 +213,35 @@ AArch64TargetMachine::AArch64TargetMachine(
initAsmInfo();
}
-AArch64TargetMachine::~AArch64TargetMachine() {}
+AArch64TargetMachine::~AArch64TargetMachine() = default;
#ifdef LLVM_BUILD_GLOBAL_ISEL
namespace {
+
struct AArch64GISelActualAccessor : public GISelAccessor {
std::unique_ptr<CallLowering> CallLoweringInfo;
std::unique_ptr<InstructionSelector> InstSelector;
std::unique_ptr<LegalizerInfo> Legalizer;
std::unique_ptr<RegisterBankInfo> RegBankInfo;
+
const CallLowering *getCallLowering() const override {
return CallLoweringInfo.get();
}
+
const InstructionSelector *getInstructionSelector() const override {
return InstSelector.get();
}
+
const LegalizerInfo *getLegalizerInfo() const override {
return Legalizer.get();
}
+
const RegisterBankInfo *getRegBankInfo() const override {
return RegBankInfo.get();
}
};
-} // End anonymous namespace.
+
+} // end anonymous namespace
#endif
const AArch64Subtarget *
@@ -287,6 +304,7 @@ AArch64beTargetMachine::AArch64beTargetMachine(
: AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
namespace {
+
/// AArch64 Code Generator Pass Configuration Options.
class AArch64PassConfig : public TargetPassConfig {
public:
@@ -324,7 +342,8 @@ public:
void addPreSched2() override;
void addPreEmitPass() override;
};
-} // namespace
+
+} // end anonymous namespace
TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() {
return TargetIRAnalysis([this](const Function &F) {
@@ -414,14 +433,17 @@ bool AArch64PassConfig::addIRTranslator() {
addPass(new IRTranslator());
return false;
}
+
bool AArch64PassConfig::addLegalizeMachineIR() {
addPass(new Legalizer());
return false;
}
+
bool AArch64PassConfig::addRegBankSelect() {
addPass(new RegBankSelect());
return false;
}
+
bool AArch64PassConfig::addGlobalInstructionSelect() {
addPass(new InstructionSelect());
return false;
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 88c98865bbc6..1a17691fc584 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -417,14 +417,17 @@ int AArch64TTIImpl::getArithmeticInstrCost(
}
}
-int AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
+ const SCEV *Ptr) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
// extra micro-ops can significantly decrease throughput.
unsigned NumVectorInstToHideOverhead = 10;
+ int MaxMergeDistance = 64;
- if (Ty->isVectorTy() && IsComplex)
+ if (Ty->isVectorTy() && SE &&
+ !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
return NumVectorInstToHideOverhead;
// In many cases the address computation is not merged into the instruction
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 24642cb1698e..849fd3d9b44a 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -104,7 +104,7 @@ public:
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
- int getAddressComputationCost(Type *Ty, bool IsComplex);
+ int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index db84afacf30e..b86a283b40d4 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -9,45 +9,62 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCExpr.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "MCTargetDesc/AArch64TargetStreamer.h"
#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCLinkerOptimizationHint.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cctype>
+#include <cstdint>
#include <cstdio>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
using namespace llvm;
namespace {
-class AArch64Operand;
-
class AArch64AsmParser : public MCTargetAsmParser {
private:
StringRef Mnemonic; ///< Instruction mnemonic.
// Map of register aliases registers via the .req directive.
- StringMap<std::pair<bool, unsigned> > RegisterReqs;
+ StringMap<std::pair<bool, unsigned>> RegisterReqs;
AArch64TargetStreamer &getTargetStreamer() {
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
@@ -118,6 +135,7 @@ public:
#include "AArch64GenAsmMatcher.inc"
};
bool IsILP32;
+
AArch64AsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
: MCTargetAsmParser(Options, STI) {
@@ -143,9 +161,6 @@ public:
MCSymbolRefExpr::VariantKind &DarwinRefKind,
int64_t &Addend);
};
-} // end anonymous namespace
-
-namespace {
/// AArch64Operand - Instances of this class represent a parsed AArch64 machine
/// instruction.
@@ -531,6 +546,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 2);
}
+
bool isImm0_7() const {
if (!isImm())
return false;
@@ -540,6 +556,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 8);
}
+
bool isImm1_8() const {
if (!isImm())
return false;
@@ -549,6 +566,7 @@ public:
int64_t Val = MCE->getValue();
return (Val > 0 && Val < 9);
}
+
bool isImm0_15() const {
if (!isImm())
return false;
@@ -558,6 +576,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 16);
}
+
bool isImm1_16() const {
if (!isImm())
return false;
@@ -567,6 +586,7 @@ public:
int64_t Val = MCE->getValue();
return (Val > 0 && Val < 17);
}
+
bool isImm0_31() const {
if (!isImm())
return false;
@@ -576,6 +596,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 32);
}
+
bool isImm1_31() const {
if (!isImm())
return false;
@@ -585,6 +606,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 1 && Val < 32);
}
+
bool isImm1_32() const {
if (!isImm())
return false;
@@ -594,6 +616,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 1 && Val < 33);
}
+
bool isImm0_63() const {
if (!isImm())
return false;
@@ -603,6 +626,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 64);
}
+
bool isImm1_63() const {
if (!isImm())
return false;
@@ -612,6 +636,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 1 && Val < 64);
}
+
bool isImm1_64() const {
if (!isImm())
return false;
@@ -621,6 +646,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 1 && Val < 65);
}
+
bool isImm0_127() const {
if (!isImm())
return false;
@@ -630,6 +656,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 128);
}
+
bool isImm0_255() const {
if (!isImm())
return false;
@@ -639,6 +666,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 256);
}
+
bool isImm0_65535() const {
if (!isImm())
return false;
@@ -648,6 +676,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 65536);
}
+
bool isImm32_63() const {
if (!isImm())
return false;
@@ -657,6 +686,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 32 && Val < 64);
}
+
bool isLogicalImm32() const {
if (!isImm())
return false;
@@ -669,6 +699,7 @@ public:
Val &= 0xFFFFFFFF;
return AArch64_AM::isLogicalImmediate(Val, 32);
}
+
bool isLogicalImm64() const {
if (!isImm())
return false;
@@ -677,6 +708,7 @@ public:
return false;
return AArch64_AM::isLogicalImmediate(MCE->getValue(), 64);
}
+
bool isLogicalImm32Not() const {
if (!isImm())
return false;
@@ -686,6 +718,7 @@ public:
int64_t Val = ~MCE->getValue() & 0xFFFFFFFF;
return AArch64_AM::isLogicalImmediate(Val, 32);
}
+
bool isLogicalImm64Not() const {
if (!isImm())
return false;
@@ -694,7 +727,9 @@ public:
return false;
return AArch64_AM::isLogicalImmediate(~MCE->getValue(), 64);
}
+
bool isShiftedImm() const { return Kind == k_ShiftedImm; }
+
bool isAddSubImm() const {
if (!isShiftedImm() && !isImm())
return false;
@@ -737,6 +772,7 @@ public:
// code deal with it.
return true;
}
+
bool isAddSubImmNeg() const {
if (!isShiftedImm() && !isImm())
return false;
@@ -756,7 +792,9 @@ public:
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
return CE != nullptr && CE->getValue() < 0 && -CE->getValue() <= 0xfff;
}
+
bool isCondCode() const { return Kind == k_CondCode; }
+
bool isSIMDImmType10() const {
if (!isImm())
return false;
@@ -765,6 +803,7 @@ public:
return false;
return AArch64_AM::isAdvSIMDModImmType10(MCE->getValue());
}
+
bool isBranchTarget26() const {
if (!isImm())
return false;
@@ -776,6 +815,7 @@ public:
return false;
return (Val >= -(0x2000000 << 2) && Val <= (0x1ffffff << 2));
}
+
bool isPCRelLabel19() const {
if (!isImm())
return false;
@@ -787,6 +827,7 @@ public:
return false;
return (Val >= -(0x40000 << 2) && Val <= (0x3ffff << 2));
}
+
bool isBranchTarget14() const {
if (!isImm())
return false;
@@ -891,40 +932,49 @@ public:
bool isFPImm() const { return Kind == k_FPImm; }
bool isBarrier() const { return Kind == k_Barrier; }
bool isSysReg() const { return Kind == k_SysReg; }
+
bool isMRSSystemRegister() const {
if (!isSysReg()) return false;
return SysReg.MRSReg != -1U;
}
+
bool isMSRSystemRegister() const {
if (!isSysReg()) return false;
return SysReg.MSRReg != -1U;
}
+
bool isSystemPStateFieldWithImm0_1() const {
if (!isSysReg()) return false;
return (SysReg.PStateField == AArch64PState::PAN ||
SysReg.PStateField == AArch64PState::UAO);
}
+
bool isSystemPStateFieldWithImm0_15() const {
if (!isSysReg() || isSystemPStateFieldWithImm0_1()) return false;
return SysReg.PStateField != -1U;
}
+
bool isReg() const override { return Kind == k_Register && !Reg.isVector; }
bool isVectorReg() const { return Kind == k_Register && Reg.isVector; }
+
bool isVectorRegLo() const {
return Kind == k_Register && Reg.isVector &&
AArch64MCRegisterClasses[AArch64::FPR128_loRegClassID].contains(
Reg.RegNum);
}
+
bool isGPR32as64() const {
return Kind == k_Register && !Reg.isVector &&
AArch64MCRegisterClasses[AArch64::GPR64RegClassID].contains(Reg.RegNum);
}
+
bool isWSeqPair() const {
return Kind == k_Register && !Reg.isVector &&
AArch64MCRegisterClasses[AArch64::WSeqPairsClassRegClassID].contains(
Reg.RegNum);
}
+
bool isXSeqPair() const {
return Kind == k_Register && !Reg.isVector &&
AArch64MCRegisterClasses[AArch64::XSeqPairsClassRegClassID].contains(
@@ -957,19 +1007,25 @@ public:
bool isVectorIndex1() const {
return Kind == k_VectorIndex && VectorIndex.Val == 1;
}
+
bool isVectorIndexB() const {
return Kind == k_VectorIndex && VectorIndex.Val < 16;
}
+
bool isVectorIndexH() const {
return Kind == k_VectorIndex && VectorIndex.Val < 8;
}
+
bool isVectorIndexS() const {
return Kind == k_VectorIndex && VectorIndex.Val < 4;
}
+
bool isVectorIndexD() const {
return Kind == k_VectorIndex && VectorIndex.Val < 2;
}
+
bool isToken() const override { return Kind == k_Token; }
+
bool isTokenEqual(StringRef Str) const {
return Kind == k_Token && getToken() == Str;
}
@@ -1006,6 +1062,7 @@ public:
AArch64_AM::ShiftExtendType ET = getShiftExtendType();
return ET != AArch64_AM::UXTX && ET != AArch64_AM::SXTX;
}
+
bool isExtendLSL64() const {
if (!isExtend())
return false;
@@ -1836,11 +1893,10 @@ void AArch64Operand::print(raw_ostream &OS) const {
OS << "<prfop invalid #" << getPrefetch() << ">";
break;
}
- case k_PSBHint: {
+ case k_PSBHint:
OS << getPSBHintName();
break;
- }
- case k_ShiftExtend: {
+ case k_ShiftExtend:
OS << "<" << AArch64_AM::getShiftExtendName(getShiftExtendType()) << " #"
<< getShiftExtendAmount();
if (!hasShiftExtendAmount())
@@ -1848,7 +1904,6 @@ void AArch64Operand::print(raw_ostream &OS) const {
OS << '>';
break;
}
- }
}
/// @name Auto-generated Match Functions
@@ -2469,7 +2524,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
Expr = MCConstantExpr::create(op2, getContext()); \
Operands.push_back( \
AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); \
- } while (0)
+ } while (false)
if (Mnemonic == "ic") {
if (!Op.compare_lower("ialluis")) {
@@ -3979,7 +4034,6 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
}
-
switch (MatchResult) {
case Match_Success: {
// Perform range checking and other semantic validations
@@ -4550,7 +4604,6 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
return Match_InvalidOperand;
}
-
OperandMatchResultTy
AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
@@ -4601,7 +4654,7 @@ AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
- if (RI->getEncodingValue(SecondReg) != FirstEncoding + 1 ||
+ if (RI->getEncodingValue(SecondReg) != FirstEncoding + 1 ||
(isXReg && !XRegClass.contains(SecondReg)) ||
(isWReg && !WRegClass.contains(SecondReg))) {
Error(E,"expected second odd register of a "
@@ -4610,7 +4663,7 @@ AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
}
unsigned Pair = 0;
- if(isXReg) {
+ if (isXReg) {
Pair = RI->getMatchingSuperReg(FirstReg, AArch64::sube64,
&AArch64MCRegisterClasses[AArch64::XSeqPairsClassRegClassID]);
} else {
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.h b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
index 24e353cf4b96..bc2f7f181699 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
@@ -17,15 +17,12 @@
namespace llvm {
-class MCInst;
-class raw_ostream;
-
class AArch64Disassembler : public MCDisassembler {
public:
AArch64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
: MCDisassembler(STI, Ctx) {}
- ~AArch64Disassembler() {}
+ ~AArch64Disassembler() override = default;
MCDisassembler::DecodeStatus
getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes,
@@ -33,6 +30,6 @@ public:
raw_ostream &CStream) const override;
};
-} // namespace llvm
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_AARCH64_DISASSEMBLER_AARCH64DISASSEMBLER_H
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index a1edb3cef46a..c954c0eb2c6b 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -17,25 +17,30 @@
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
namespace {
+
class AArch64ELFObjectWriter : public MCELFObjectTargetWriter {
public:
AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian, bool IsILP32);
- ~AArch64ELFObjectWriter() override;
+ ~AArch64ELFObjectWriter() override = default;
protected:
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
bool IsILP32;
-private:
};
-}
+
+} // end anonymous namespace
AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI,
bool IsLittleEndian,
@@ -44,8 +49,6 @@ AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI,
/*HasRelocationAddend*/ true),
IsILP32(IsILP32) {}
-AArch64ELFObjectWriter::~AArch64ELFObjectWriter() {}
-
#define R_CLS(rtype) \
IsILP32 ? ELF::R_AARCH64_P32_##rtype : ELF::R_AARCH64_##rtype
#define BAD_ILP32_MOV(lp64rtype) "ILP32 absolute MOV relocation not "\
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index f7058cdf2373..62dfa59483eb 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -15,15 +15,23 @@
#include "MCTargetDesc/AArch64FixupKinds.h"
#include "MCTargetDesc/AArch64MCExpr.h"
#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+
using namespace llvm;
#define DEBUG_TYPE "mccodeemitter"
@@ -37,13 +45,12 @@ class AArch64MCCodeEmitter : public MCCodeEmitter {
MCContext &Ctx;
const MCInstrInfo &MCII;
- AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
- void operator=(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
public:
AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
: Ctx(ctx), MCII(mcii) {}
-
- ~AArch64MCCodeEmitter() override {}
+ AArch64MCCodeEmitter(const AArch64MCCodeEmitter &) = delete;
+ void operator=(const AArch64MCCodeEmitter &) = delete;
+ ~AArch64MCCodeEmitter() override = default;
// getBinaryCodeForInstr - TableGen'erated function for getting the
// binary encoding for an instruction.
@@ -181,12 +188,6 @@ private:
} // end anonymous namespace
-MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx) {
- return new AArch64MCCodeEmitter(MCII, Ctx);
-}
-
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
unsigned
@@ -601,3 +602,9 @@ unsigned AArch64MCCodeEmitter::fixOneOperandFPComparison(
#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "AArch64GenMCCodeEmitter.inc"
+
+MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new AArch64MCCodeEmitter(MCII, Ctx);
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index 3e86a42d5be6..1b949b54590c 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -13,6 +13,7 @@
#include "AArch64TargetStreamer.h"
#include "llvm/MC/ConstantPools.h"
+
using namespace llvm;
//
@@ -21,7 +22,7 @@ using namespace llvm;
AArch64TargetStreamer::AArch64TargetStreamer(MCStreamer &S)
: MCTargetStreamer(S), ConstantPools(new AssemblerConstantPools()) {}
-AArch64TargetStreamer::~AArch64TargetStreamer() {}
+AArch64TargetStreamer::~AArch64TargetStreamer() = default;
// The constant pool handling is shared by all AArch64TargetStreamer
// implementations.
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index a8e6902c252b..4acd55eb6120 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -176,12 +176,14 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
SetupMachineFunction(MF);
+ const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
MCContext &Context = getObjFileLowering().getContext();
- MCSectionELF *ConfigSection =
- Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
- OutStreamer->SwitchSection(ConfigSection);
+ if (!STM.isAmdHsaOS()) {
+ MCSectionELF *ConfigSection =
+ Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
+ OutStreamer->SwitchSection(ConfigSection);
+ }
- const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
getSIProgramInfo(KernelInfo, MF);
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 85cbadf0a570..5f651d4da5d2 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -269,7 +269,7 @@ unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) {
unsigned encodeWaitcnt(IsaVersion Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
- unsigned Waitcnt = getWaitcntBitMask(Version);;
+ unsigned Waitcnt = getWaitcntBitMask(Version);
Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 10e6297ef1ed..cc001b596785 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -338,14 +338,17 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
-int ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
+ const SCEV *Ptr) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
// extra micro-ops can significantly decrease throughput.
unsigned NumVectorInstToHideOverhead = 10;
+ int MaxMergeDistance = 64;
- if (Ty->isVectorTy() && IsComplex)
+ if (Ty->isVectorTy() && SE &&
+ !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
return NumVectorInstToHideOverhead;
// In many cases the address computation is not merged into the instruction
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
index d83228afb0ab..731a5adf3d73 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -104,7 +104,8 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- int getAddressComputationCost(Type *Val, bool IsComplex);
+ int getAddressComputationCost(Type *Val, ScalarEvolution *SE,
+ const SCEV *Ptr);
int getFPOpCost(Type *Ty);
diff --git a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index 903f92a04431..57ead973b56e 100644
--- a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -8,23 +8,41 @@
//===----------------------------------------------------------------------===//
#include "Lanai.h"
+#include "LanaiAluCode.h"
+#include "LanaiCondCode.h"
#include "MCTargetDesc/LanaiMCExpr.h"
-#include "MCTargetDesc/LanaiMCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
namespace llvm {
+
+// Auto-generated by TableGen
+static unsigned MatchRegisterName(StringRef Name);
+
namespace {
+
struct LanaiOperand;
class LanaiAsmParser : public MCTargetAsmParser {
@@ -80,9 +98,6 @@ private:
const MCSubtargetInfo &SubtargetInfo;
};
-// Auto-generated by TableGen
-static unsigned MatchRegisterName(llvm::StringRef Name);
-
// LanaiOperand - Instances of this class represented a parsed machine
// instruction
struct LanaiOperand : public MCParsedAsmOperand {
@@ -627,6 +642,8 @@ public:
}
};
+} // end anonymous namespace
+
bool LanaiAsmParser::ParseDirective(AsmToken /*DirectiveId*/) { return true; }
bool LanaiAsmParser::MatchAndEmitInstruction(SMLoc IdLoc, unsigned &Opcode,
@@ -680,11 +697,11 @@ std::unique_ptr<LanaiOperand> LanaiAsmParser::parseRegister() {
if (Lexer.getKind() == AsmToken::Identifier) {
RegNum = MatchRegisterName(Lexer.getTok().getIdentifier());
if (RegNum == 0)
- return 0;
+ return nullptr;
Parser.Lex(); // Eat identifier token
return LanaiOperand::createReg(RegNum, Start, End);
}
- return 0;
+ return nullptr;
}
bool LanaiAsmParser::ParseRegister(unsigned &RegNum, SMLoc &StartLoc,
@@ -701,15 +718,15 @@ bool LanaiAsmParser::ParseRegister(unsigned &RegNum, SMLoc &StartLoc,
std::unique_ptr<LanaiOperand> LanaiAsmParser::parseIdentifier() {
SMLoc Start = Parser.getTok().getLoc();
SMLoc End = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
- const MCExpr *Res, *RHS = 0;
+ const MCExpr *Res, *RHS = nullptr;
LanaiMCExpr::VariantKind Kind = LanaiMCExpr::VK_Lanai_None;
if (Lexer.getKind() != AsmToken::Identifier)
- return 0;
+ return nullptr;
StringRef Identifier;
if (Parser.parseIdentifier(Identifier))
- return 0;
+ return nullptr;
// Check if identifier has a modifier
if (Identifier.equals_lower("hi"))
@@ -722,24 +739,24 @@ std::unique_ptr<LanaiOperand> LanaiAsmParser::parseIdentifier() {
if (Kind != LanaiMCExpr::VK_Lanai_None) {
if (Lexer.getKind() != AsmToken::LParen) {
Error(Lexer.getLoc(), "Expected '('");
- return 0;
+ return nullptr;
}
Lexer.Lex(); // lex '('
// Parse identifier
if (Parser.parseIdentifier(Identifier))
- return 0;
+ return nullptr;
}
// If addition parse the RHS.
if (Lexer.getKind() == AsmToken::Plus && Parser.parseExpression(RHS))
- return 0;
+ return nullptr;
// For variants parse the final ')'
if (Kind != LanaiMCExpr::VK_Lanai_None) {
if (Lexer.getKind() != AsmToken::RParen) {
Error(Lexer.getLoc(), "Expected ')'");
- return 0;
+ return nullptr;
}
Lexer.Lex(); // lex ')'
}
@@ -771,7 +788,7 @@ std::unique_ptr<LanaiOperand> LanaiAsmParser::parseImmediate() {
if (!Parser.parseExpression(ExprVal))
return LanaiOperand::createImm(ExprVal, Start, End);
default:
- return 0;
+ return nullptr;
}
}
@@ -1204,10 +1221,9 @@ bool LanaiAsmParser::ParseInstruction(ParseInstructionInfo & /*Info*/,
#define GET_REGISTER_MATCHER
#define GET_MATCHER_IMPLEMENTATION
#include "LanaiGenAsmMatcher.inc"
-} // namespace
extern "C" void LLVMInitializeLanaiAsmParser() {
RegisterMCAsmParser<LanaiAsmParser> x(getTheLanaiTarget());
}
-} // namespace llvm
+} // end namespace llvm
diff --git a/lib/Target/Lanai/Disassembler/LanaiDisassembler.h b/lib/Target/Lanai/Disassembler/LanaiDisassembler.h
index a317cd88ad63..e0c19e8ea644 100644
--- a/lib/Target/Lanai/Disassembler/LanaiDisassembler.h
+++ b/lib/Target/Lanai/Disassembler/LanaiDisassembler.h
@@ -20,14 +20,11 @@
namespace llvm {
-class MCInst;
-class raw_ostream;
-
class LanaiDisassembler : public MCDisassembler {
public:
LanaiDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx);
- ~LanaiDisassembler() override {}
+ ~LanaiDisassembler() override = default;
// getInstruction - See MCDisassembler.
MCDisassembler::DecodeStatus
@@ -36,6 +33,6 @@ public:
raw_ostream &CStream) const override;
};
-} // namespace llvm
+} // end namespace llvm
#endif // LLVM_LIB_TARGET_LANAI_DISASSEMBLER_LANAIDISASSEMBLER_H
diff --git a/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h b/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h
index 1c9d186ad819..59904fbaa318 100644
--- a/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h
+++ b/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h
@@ -14,10 +14,10 @@
#ifndef LLVM_LIB_TARGET_LANAI_INSTPRINTER_LANAIINSTPRINTER_H
#define LLVM_LIB_TARGET_LANAI_INSTPRINTER_LANAIINSTPRINTER_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
-class MCOperand;
class LanaiInstPrinter : public MCInstPrinter {
public:
@@ -28,14 +28,14 @@ public:
void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
const MCSubtargetInfo &STI) override;
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printMemRiOperand(const MCInst *MI, int OpNo, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
void printMemRrOperand(const MCInst *MI, int OpNo, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
void printMemSplsOperand(const MCInst *MI, int OpNo, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
void printCCOperand(const MCInst *MI, int OpNo, raw_ostream &O);
void printAluOperand(const MCInst *MI, int OpNo, raw_ostream &O);
void printHi16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
@@ -60,6 +60,7 @@ private:
bool printMemoryStoreIncrement(const MCInst *MI, raw_ostream &Ostream,
StringRef Opcode, int AddOffset);
};
-} // namespace llvm
+
+} // end namespace llvm
#endif // LLVM_LIB_TARGET_LANAI_INSTPRINTER_LANAIINSTPRINTER_H
diff --git a/lib/Target/Lanai/LanaiISelLowering.cpp b/lib/Target/Lanai/LanaiISelLowering.cpp
index ae7870e07d42..d156294a0b0c 100644
--- a/lib/Target/Lanai/LanaiISelLowering.cpp
+++ b/lib/Target/Lanai/LanaiISelLowering.cpp
@@ -11,31 +11,46 @@
//
//===----------------------------------------------------------------------===//
-#include "LanaiISelLowering.h"
-
#include "Lanai.h"
+#include "LanaiCondCode.h"
+#include "LanaiISelLowering.h"
#include "LanaiMachineFunctionInfo.h"
#include "LanaiSubtarget.h"
-#include "LanaiTargetMachine.h"
#include "LanaiTargetObjectFile.h"
+#include "MCTargetDesc/LanaiBaseInfo.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalAlias.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetCallingConv.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <cstdlib>
+#include <utility>
#define DEBUG_TYPE "lanai-lower"
@@ -195,6 +210,7 @@ SDValue LanaiTargetLowering::LowerOperation(SDValue Op,
llvm_unreachable("unimplemented operand");
}
}
+
//===----------------------------------------------------------------------===//
// Lanai Inline Assembly Support
//===----------------------------------------------------------------------===//
@@ -244,7 +260,7 @@ LanaiTargetLowering::getSingleConstraintMatchWeight(
Value *CallOperandVal = Info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
- if (CallOperandVal == NULL)
+ if (CallOperandVal == nullptr)
return CW_Default;
// Look at the constraint type.
switch (*Constraint) {
@@ -270,7 +286,7 @@ LanaiTargetLowering::getSingleConstraintMatchWeight(
void LanaiTargetLowering::LowerAsmOperandForConstraint(
SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
- SDValue Result(0, 0);
+ SDValue Result(nullptr, 0);
// Only support length 1 constraints for now.
if (Constraint.length() > 1)
@@ -676,7 +692,7 @@ SDValue LanaiTargetLowering::LowerCCCCallTo(
} else {
assert(VA.isMemLoc());
- if (StackPtr.getNode() == 0)
+ if (StackPtr.getNode() == nullptr)
StackPtr = DAG.getCopyFromReg(Chain, DL, Lanai::SP,
getPointerTy(DAG.getDataLayout()));
@@ -1120,7 +1136,7 @@ const char *LanaiTargetLowering::getTargetNodeName(unsigned Opcode) const {
case LanaiISD::SMALL:
return "LanaiISD::SMALL";
default:
- return NULL;
+ return nullptr;
}
}
diff --git a/lib/Target/Lanai/LanaiRegisterInfo.h b/lib/Target/Lanai/LanaiRegisterInfo.h
index 8b84bbc460e8..c6e459076ebc 100644
--- a/lib/Target/Lanai/LanaiRegisterInfo.h
+++ b/lib/Target/Lanai/LanaiRegisterInfo.h
@@ -21,9 +21,6 @@
namespace llvm {
-class TargetInstrInfo;
-class Type;
-
struct LanaiRegisterInfo : public LanaiGenRegisterInfo {
LanaiRegisterInfo();
@@ -32,7 +29,7 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo {
// Code Generation virtual methods.
const uint16_t *
- getCalleeSavedRegs(const MachineFunction *MF = 0) const override;
+ getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
@@ -42,7 +39,7 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo {
void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned FIOperandNum,
- RegScavenger *RS = NULL) const override;
+ RegScavenger *RS = nullptr) const override;
bool canRealignStack(const MachineFunction &MF) const override;
@@ -58,6 +55,6 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo {
int getDwarfRegNum(unsigned RegNum, bool IsEH) const;
};
-} // namespace llvm
+} // end namespace llvm
#endif // LLVM_LIB_TARGET_LANAI_LANAIREGISTERINFO_H
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
index e30d5e9a18eb..e02bba529bd5 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
@@ -9,20 +9,19 @@
#include "MCTargetDesc/LanaiBaseInfo.h"
#include "MCTargetDesc/LanaiFixupKinds.h"
-#include "MCTargetDesc/LanaiMCTargetDesc.h"
#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm;
namespace {
+
class LanaiELFObjectWriter : public MCELFObjectTargetWriter {
public:
explicit LanaiELFObjectWriter(uint8_t OSABI);
- ~LanaiELFObjectWriter() override;
+ ~LanaiELFObjectWriter() override = default;
protected:
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
@@ -30,14 +29,13 @@ protected:
bool needsRelocateWithSymbol(const MCSymbol &SD,
unsigned Type) const override;
};
-} // namespace
+
+} // end anonymous namespace
LanaiELFObjectWriter::LanaiELFObjectWriter(uint8_t OSABI)
: MCELFObjectTargetWriter(/*Is64Bit_=*/false, OSABI, ELF::EM_LANAI,
/*HasRelocationAddend=*/true) {}
-LanaiELFObjectWriter::~LanaiELFObjectWriter() {}
-
unsigned LanaiELFObjectWriter::getRelocType(MCContext & /*Ctx*/,
const MCValue & /*Target*/,
const MCFixup &Fixup,
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
index ce68b7e24dba..f5b5335bb989 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
@@ -12,37 +12,38 @@
//===----------------------------------------------------------------------===//
#include "Lanai.h"
+#include "LanaiAluCode.h"
#include "MCTargetDesc/LanaiBaseInfo.h"
#include "MCTargetDesc/LanaiFixupKinds.h"
#include "MCTargetDesc/LanaiMCExpr.h"
-#include "MCTargetDesc/LanaiMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
#define DEBUG_TYPE "mccodeemitter"
STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
namespace llvm {
+
namespace {
-class LanaiMCCodeEmitter : public MCCodeEmitter {
- LanaiMCCodeEmitter(const LanaiMCCodeEmitter &); // DO NOT IMPLEMENT
- void operator=(const LanaiMCCodeEmitter &); // DO NOT IMPLEMENT
- const MCInstrInfo &InstrInfo;
- MCContext &Context;
+class LanaiMCCodeEmitter : public MCCodeEmitter {
public:
- LanaiMCCodeEmitter(const MCInstrInfo &MCII, MCContext &C)
- : InstrInfo(MCII), Context(C) {}
-
- ~LanaiMCCodeEmitter() override {}
+ LanaiMCCodeEmitter(const MCInstrInfo &MCII, MCContext &C) {}
+ LanaiMCCodeEmitter(const LanaiMCCodeEmitter &) = delete;
+ void operator=(const LanaiMCCodeEmitter &) = delete;
+ ~LanaiMCCodeEmitter() override = default;
// The functions below are called by TableGen generated functions for getting
// the binary encoding of instructions/opereands.
@@ -86,6 +87,8 @@ public:
const MCSubtargetInfo &STI) const;
};
+} // end anonymous namespace
+
Lanai::Fixups FixupKind(const MCExpr *Expr) {
if (isa<MCSymbolRefExpr>(Expr))
return Lanai::FIXUP_LANAI_21;
@@ -298,8 +301,8 @@ unsigned LanaiMCCodeEmitter::getBranchTargetOpValue(
}
#include "LanaiGenMCCodeEmitter.inc"
-} // namespace
-} // namespace llvm
+
+} // end namespace llvm
llvm::MCCodeEmitter *
llvm::createLanaiMCCodeEmitter(const MCInstrInfo &InstrInfo,
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
index c2f8c0f7ad50..a47ff9ff3d61 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
@@ -11,16 +11,21 @@
//
//===----------------------------------------------------------------------===//
+#include "LanaiMCAsmInfo.h"
#include "LanaiMCTargetDesc.h"
-
#include "InstPrinter/LanaiInstPrinter.h"
-#include "LanaiMCAsmInfo.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
+#include <cstdint>
+#include <string>
#define GET_INSTRINFO_MC_DESC
#include "LanaiGenInstrInfo.inc"
@@ -70,7 +75,7 @@ static MCInstPrinter *createLanaiMCInstPrinter(const Triple & /*T*/,
const MCRegisterInfo &MRI) {
if (SyntaxVariant == 0)
return new LanaiInstPrinter(MAI, MII, MRI);
- return 0;
+ return nullptr;
}
static MCRelocationInfo *createLanaiElfRelocation(const Triple &TheTriple,
@@ -79,6 +84,7 @@ static MCRelocationInfo *createLanaiElfRelocation(const Triple &TheTriple,
}
namespace {
+
class LanaiMCInstrAnalysis : public MCInstrAnalysis {
public:
explicit LanaiMCInstrAnalysis(const MCInstrInfo *Info)
@@ -107,6 +113,7 @@ public:
}
}
};
+
} // end anonymous namespace
static MCInstrAnalysis *createLanaiInstrAnalysis(const MCInstrInfo *Info) {
@@ -131,7 +138,7 @@ extern "C" void LLVMInitializeLanaiTargetMC() {
// Register the MC code emitter
TargetRegistry::RegisterMCCodeEmitter(getTheLanaiTarget(),
- llvm::createLanaiMCCodeEmitter);
+ createLanaiMCCodeEmitter);
// Register the ASM Backend
TargetRegistry::RegisterMCAsmBackend(getTheLanaiTarget(),
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index d3c88482f092..05acd25ae5fc 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -47,7 +47,7 @@ namespace llvm {
FCTIDZ, FCTIWZ,
/// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
- /// unsigned integers.
+ /// unsigned integers with round toward zero.
FCTIDUZ, FCTIWUZ,
/// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 03b2257a88a8..fbec8787ef8d 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1154,6 +1154,9 @@ defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB),
defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB),
"fctid", "$frD, $frB", IIC_FPGeneral,
[]>, isPPC64;
+defm FCTIDU : XForm_26r<63, 942, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctidu", "$frD, $frB", IIC_FPGeneral,
+ []>, isPPC64;
defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB),
"fctidz", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 99689f656c2d..ef7d2012a233 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -603,6 +603,12 @@ class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
+class XForm_17a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : XForm_17<opcode, xo, OOL, IOL, asmstr, itin > {
+ let FRA = 0;
+}
+
// Used for QPX
class XForm_18<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index a7231bd2e2c0..90111bbea07d 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -2172,11 +2172,19 @@ let isCompare = 1, hasSideEffects = 0 in {
"fcmpu $crD, $fA, $fB", IIC_FPCompare>;
}
+def FTDIV: XForm_17<63, 128, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
+ "ftdiv $crD, $fA, $fB", IIC_FPCompare>;
+def FTSQRT: XForm_17a<63, 160, (outs crrc:$crD), (ins f8rc:$fB),
+ "ftsqrt $crD, $fB", IIC_FPCompare>;
+
let Uses = [RM] in {
let hasSideEffects = 0 in {
defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB),
"fctiw", "$frD, $frB", IIC_FPGeneral,
[]>;
+ defm FCTIWU : XForm_26r<63, 142, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiwu", "$frD, $frB", IIC_FPGeneral,
+ []>;
defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB),
"fctiwz", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfctiwz f64:$frB))]>;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index fd2189397279..7f72ab17f619 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -16985,10 +16985,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, newSelect, zeroConst);
}
- if (Cond.getOpcode() == ISD::SETCC) {
+ if (Cond.getOpcode() == ISD::SETCC)
if (SDValue NewCond = LowerSETCC(Cond, DAG))
Cond = NewCond;
- }
// (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
// (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
@@ -18289,6 +18288,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
/// constant. Takes immediate version of shift as input.
static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
SDValue SrcOp, SDValue ShAmt,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT SVT = ShAmt.getSimpleValueType();
assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!");
@@ -18306,27 +18306,32 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
}
- const X86Subtarget &Subtarget =
- static_cast<const X86Subtarget &>(DAG.getSubtarget());
- if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
- ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
- // Let the shuffle legalizer expand this shift amount node.
+ // Need to build a vector containing shift amount.
+ // SSE/AVX packed shifts only use the lower 64-bit of the shift count.
+ // +=================+============+=======================================+
+ // | ShAmt is | HasSSE4.1? | Construct ShAmt vector as |
+ // +=================+============+=======================================+
+ // | i64 | Yes, No | Use ShAmt as lowest elt |
+ // | i32 | Yes | zero-extend in-reg |
+ // | (i32 zext(i16)) | Yes | zero-extend in-reg |
+ // | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) |
+ // +=================+============+=======================================+
+
+ if (SVT == MVT::i64)
+ ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt);
+ else if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
+ ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
SDValue Op0 = ShAmt.getOperand(0);
Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(Op0), MVT::v8i16, Op0);
- ShAmt = getShuffleVectorZeroOrUndef(Op0, 0, true, Subtarget, DAG);
+ ShAmt = DAG.getZeroExtendVectorInReg(Op0, SDLoc(Op0), MVT::v2i64);
+ } else if (Subtarget.hasSSE41() &&
+ ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt);
+ ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
} else {
- // Need to build a vector containing shift amount.
- // SSE/AVX packed shifts only use the lower 64-bit of the shift count.
- SmallVector<SDValue, 4> ShOps;
- ShOps.push_back(ShAmt);
- if (SVT == MVT::i32) {
- ShOps.push_back(DAG.getConstant(0, dl, SVT));
- ShOps.push_back(DAG.getUNDEF(SVT));
- }
- ShOps.push_back(DAG.getUNDEF(SVT));
-
- MVT BVT = SVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64;
- ShAmt = DAG.getBuildVector(BVT, dl, ShOps);
+ SmallVector<SDValue, 4> ShOps = {ShAmt, DAG.getConstant(0, dl, SVT),
+ DAG.getUNDEF(SVT), DAG.getUNDEF(SVT)};
+ ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps);
}
// The return type has to be a 128-bit type with the same element
@@ -19014,7 +19019,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
}
case VSHIFT:
return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
- Op.getOperand(1), Op.getOperand(2), DAG);
+ Op.getOperand(1), Op.getOperand(2), Subtarget,
+ DAG);
case COMPRESS_EXPAND_IN_REG: {
SDValue Mask = Op.getOperand(3);
SDValue DataToCompress = Op.getOperand(1);
@@ -21276,7 +21282,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
else if (EltVT.bitsLT(MVT::i32))
BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
- return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, DAG);
+ return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG);
}
}
@@ -25951,12 +25957,11 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
// instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
+ bool FloatDomain,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) {
unsigned NumMaskElts = Mask.size();
unsigned MaskEltSize = MaskVT.getScalarSizeInBits();
- bool FloatDomain = MaskVT.isFloatingPoint() ||
- (!Subtarget.hasAVX2() && MaskVT.is256BitVector());
// Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2())) &&
@@ -26067,11 +26072,11 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
// permute instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
+ bool FloatDomain,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
unsigned &PermuteImm) {
unsigned NumMaskElts = Mask.size();
- bool FloatDomain = MaskVT.isFloatingPoint();
bool ContainsZeros = false;
SmallBitVector Zeroable(NumMaskElts, false);
@@ -26211,11 +26216,10 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
// shuffle instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
- SDValue &V1, SDValue &V2,
+ bool FloatDomain, SDValue &V1, SDValue &V2,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
bool IsUnary) {
- bool FloatDomain = MaskVT.isFloatingPoint();
unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
if (MaskVT.is128BitVector()) {
@@ -26310,13 +26314,13 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
+ bool FloatDomain,
SDValue &V1, SDValue &V2,
SDLoc &DL, SelectionDAG &DAG,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
unsigned &PermuteImm) {
unsigned NumMaskElts = Mask.size();
- bool FloatDomain = MaskVT.isFloatingPoint();
// Attempt to match against PALIGNR byte rotate.
if (!FloatDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) ||
@@ -26594,8 +26598,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
- if (matchUnaryVectorShuffle(MaskVT, Mask, Subtarget, Shuffle, ShuffleSrcVT,
- ShuffleVT)) {
+ if (matchUnaryVectorShuffle(MaskVT, Mask, FloatDomain, Subtarget, Shuffle,
+ ShuffleSrcVT, ShuffleVT)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@@ -26609,8 +26613,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return true;
}
- if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Subtarget, Shuffle,
- ShuffleVT, PermuteImm)) {
+ if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, FloatDomain, Subtarget,
+ Shuffle, ShuffleVT, PermuteImm)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@@ -26626,8 +26630,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
- if (matchBinaryVectorShuffle(MaskVT, Mask, V1, V2, Subtarget, Shuffle,
- ShuffleVT, UnaryShuffle)) {
+ if (matchBinaryVectorShuffle(MaskVT, Mask, FloatDomain, V1, V2, Subtarget,
+ Shuffle, ShuffleVT, UnaryShuffle)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@@ -26643,8 +26647,9 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return true;
}
- if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, V1, V2, DL, DAG, Subtarget,
- Shuffle, ShuffleVT, PermuteImm)) {
+ if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, FloatDomain, V1, V2, DL,
+ DAG, Subtarget, Shuffle, ShuffleVT,
+ PermuteImm)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@@ -28742,6 +28747,27 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
DAG.getConstant(Imm, DL, MVT::i8)));
return true;
}
+ case ISD::EXTRACT_SUBVECTOR: {
+ unsigned EltSize = EltVT.getSizeInBits();
+ if (EltSize != 32 && EltSize != 64)
+ return false;
+ MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
+ // Only change element size, not type.
+ if (VT.isInteger() != OpEltVT.isInteger())
+ return false;
+ uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize;
+ // Op0 needs to be bitcasted to a larger vector with the same element type.
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = MVT::getVectorVT(EltVT,
+ Op0.getSimpleValueType().getSizeInBits() / EltSize);
+ Op0 = DAG.getBitcast(Op0VT, Op0);
+ DCI.AddToWorklist(Op0.getNode());
+ DCI.CombineTo(OrigOp.getNode(),
+ DAG.getNode(Opcode, DL, VT, Op0,
+ DAG.getConstant(Imm, DL, MVT::i8)));
+ return true;
+ }
}
return false;
@@ -30921,6 +30947,59 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
}
+/// Check if truncation with saturation form type \p SrcVT to \p DstVT
+/// is valid for the given \p Subtarget.
+static bool
+isSATValidOnSubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) {
+ if (!Subtarget.hasAVX512())
+ return false;
+ EVT SrcElVT = SrcVT.getScalarType();
+ EVT DstElVT = DstVT.getScalarType();
+ if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64)
+ return false;
+ if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32)
+ return false;
+ if (SrcVT.is512BitVector() || Subtarget.hasVLX())
+ return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI();
+ return false;
+}
+
+/// Detect a pattern of truncation with saturation:
+/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
+/// Return the source value to be truncated or SDValue() if the pattern was not
+/// matched or the unsupported on the current target.
+static SDValue
+detectUSatPattern(SDValue In, EVT VT, const X86Subtarget &Subtarget) {
+ if (In.getOpcode() != ISD::UMIN)
+ return SDValue();
+
+ EVT InVT = In.getValueType();
+ // FIXME: Scalar type may be supported if we move it to vector register.
+ if (!InVT.isVector() || !InVT.isSimple())
+ return SDValue();
+
+ if (!isSATValidOnSubtarget(InVT, VT, Subtarget))
+ return SDValue();
+
+ //Saturation with truncation. We truncate from InVT to VT.
+ assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() &&
+ "Unexpected types for truncate operation");
+
+ SDValue SrcVal;
+ APInt C;
+ if (ISD::isConstantSplatVector(In.getOperand(0).getNode(), C))
+ SrcVal = In.getOperand(1);
+ else if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C))
+ SrcVal = In.getOperand(0);
+ else
+ return SDValue();
+
+ // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
+ // the element size of the destination type.
+ return (C == ((uint64_t)1 << VT.getScalarSizeInBits()) - 1) ?
+ SrcVal : SDValue();
+}
+
/// This function detects the AVG pattern between vectors of unsigned i8/i16,
/// which is c = (a + b + 1) / 2, and replace this operation with the efficient
/// X86ISD::AVG instruction.
@@ -31487,6 +31566,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
St->getPointerInfo(), St->getAlignment(),
St->getMemOperand()->getFlags());
+ if (SDValue Val =
+ detectUSatPattern(St->getValue(), St->getMemoryVT(), Subtarget))
+ return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
+ dl, Val, St->getBasePtr(),
+ St->getMemoryVT(), St->getMemOperand(), DAG);
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElems = VT.getVectorNumElements();
assert(StVT != VT && "Cannot truncate to the same type");
@@ -31967,7 +32052,8 @@ combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG,
/// Truncate a group of v4i32 into v8i16 using X86ISD::PACKSS.
static SDValue
-combineVectorTruncationWithPACKSS(SDNode *N, SelectionDAG &DAG,
+combineVectorTruncationWithPACKSS(SDNode *N, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG,
SmallVector<SDValue, 8> &Regs) {
assert(Regs.size() > 0 && Regs[0].getValueType() == MVT::v4i32);
EVT OutVT = N->getValueType(0);
@@ -31976,8 +32062,10 @@ combineVectorTruncationWithPACKSS(SDNode *N, SelectionDAG &DAG,
// Shift left by 16 bits, then arithmetic-shift right by 16 bits.
SDValue ShAmt = DAG.getConstant(16, DL, MVT::i32);
for (auto &Reg : Regs) {
- Reg = getTargetVShiftNode(X86ISD::VSHLI, DL, MVT::v4i32, Reg, ShAmt, DAG);
- Reg = getTargetVShiftNode(X86ISD::VSRAI, DL, MVT::v4i32, Reg, ShAmt, DAG);
+ Reg = getTargetVShiftNode(X86ISD::VSHLI, DL, MVT::v4i32, Reg, ShAmt,
+ Subtarget, DAG);
+ Reg = getTargetVShiftNode(X86ISD::VSRAI, DL, MVT::v4i32, Reg, ShAmt,
+ Subtarget, DAG);
}
for (unsigned i = 0, e = Regs.size() / 2; i < e; i++)
@@ -32046,7 +32134,7 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
if (Subtarget.hasSSE41() || OutSVT == MVT::i8)
return combineVectorTruncationWithPACKUS(N, DAG, SubVec);
else if (InSVT == MVT::i32)
- return combineVectorTruncationWithPACKSS(N, DAG, SubVec);
+ return combineVectorTruncationWithPACKSS(N, Subtarget, DAG, SubVec);
else
return SDValue();
}
@@ -32104,6 +32192,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
return Avg;
+ // Try the truncation with unsigned saturation.
+ if (SDValue Val = detectUSatPattern(Src, VT, Subtarget))
+ return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, Val);
+
// The bitcast source is a direct mmx result.
// Detect bitcasts between i32 to x86mmx
if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) {
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index d7792e296a58..de4839432b9a 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -80,9 +80,12 @@ unsigned X86TTIImpl::getNumberOfRegisters(bool Vector) {
unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) {
if (Vector) {
- if (ST->hasAVX512()) return 512;
- if (ST->hasAVX()) return 256;
- if (ST->hasSSE1()) return 128;
+ if (ST->hasAVX512())
+ return 512;
+ if (ST->hasAVX())
+ return 256;
+ if (ST->hasSSE1())
+ return 128;
return 0;
}
@@ -211,11 +214,9 @@ int X86TTIImpl::getArithmeticInstrCost(
};
// Look for AVX512DQ lowering tricks for custom cases.
- if (ST->hasDQI()) {
- if (const auto *Entry = CostTableLookup(AVX512DQCostTable, ISD,
- LT.second))
+ if (ST->hasDQI())
+ if (const auto *Entry = CostTableLookup(AVX512DQCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- }
static const CostTblEntry AVX512BWCostTable[] = {
{ ISD::MUL, MVT::v64i8, 11 }, // extend/pmullw/trunc sequence.
@@ -225,37 +226,38 @@ int X86TTIImpl::getArithmeticInstrCost(
// Vectorizing division is a bad idea. See the SSE2 table for more comments.
{ ISD::SDIV, MVT::v64i8, 64*20 },
{ ISD::SDIV, MVT::v32i16, 32*20 },
- { ISD::SDIV, MVT::v16i32, 16*20 },
- { ISD::SDIV, MVT::v8i64, 8*20 },
{ ISD::UDIV, MVT::v64i8, 64*20 },
- { ISD::UDIV, MVT::v32i16, 32*20 },
- { ISD::UDIV, MVT::v16i32, 16*20 },
- { ISD::UDIV, MVT::v8i64, 8*20 },
+ { ISD::UDIV, MVT::v32i16, 32*20 }
};
// Look for AVX512BW lowering tricks for custom cases.
- if (ST->hasBWI()) {
- if (const auto *Entry = CostTableLookup(AVX512BWCostTable, ISD,
- LT.second))
+ if (ST->hasBWI())
+ if (const auto *Entry = CostTableLookup(AVX512BWCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- }
static const CostTblEntry AVX512CostTable[] = {
- { ISD::SHL, MVT::v16i32, 1 },
- { ISD::SRL, MVT::v16i32, 1 },
- { ISD::SRA, MVT::v16i32, 1 },
- { ISD::SHL, MVT::v8i64, 1 },
- { ISD::SRL, MVT::v8i64, 1 },
- { ISD::SRA, MVT::v8i64, 1 },
-
- { ISD::MUL, MVT::v32i8, 13 }, // extend/pmullw/trunc sequence.
- { ISD::MUL, MVT::v16i8, 5 }, // extend/pmullw/trunc sequence.
+ { ISD::SHL, MVT::v16i32, 1 },
+ { ISD::SRL, MVT::v16i32, 1 },
+ { ISD::SRA, MVT::v16i32, 1 },
+ { ISD::SHL, MVT::v8i64, 1 },
+ { ISD::SRL, MVT::v8i64, 1 },
+ { ISD::SRA, MVT::v8i64, 1 },
+
+ { ISD::MUL, MVT::v32i8, 13 }, // extend/pmullw/trunc sequence.
+ { ISD::MUL, MVT::v16i8, 5 }, // extend/pmullw/trunc sequence.
+ { ISD::MUL, MVT::v16i32, 1 }, // pmulld
+ { ISD::MUL, MVT::v8i64, 8 }, // 3*pmuludq/3*shift/2*add
+
+ // Vectorizing division is a bad idea. See the SSE2 table for more comments.
+ { ISD::SDIV, MVT::v16i32, 16*20 },
+ { ISD::SDIV, MVT::v8i64, 8*20 },
+ { ISD::UDIV, MVT::v16i32, 16*20 },
+ { ISD::UDIV, MVT::v8i64, 8*20 }
};
- if (ST->hasAVX512()) {
+ if (ST->hasAVX512())
if (const auto *Entry = CostTableLookup(AVX512CostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- }
static const CostTblEntry AVX2CostTable[] = {
// Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
@@ -315,10 +317,9 @@ int X86TTIImpl::getArithmeticInstrCost(
};
// Look for XOP lowering tricks.
- if (ST->hasXOP()) {
+ if (ST->hasXOP())
if (const auto *Entry = CostTableLookup(XOPCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- }
static const CostTblEntry AVX2CustomCostTable[] = {
{ ISD::SHL, MVT::v32i8, 11 }, // vpblendvb sequence.
@@ -334,6 +335,8 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::MUL, MVT::v32i8, 17 }, // extend/pmullw/trunc sequence.
{ ISD::MUL, MVT::v16i8, 7 }, // extend/pmullw/trunc sequence.
+ { ISD::MUL, MVT::v8i32, 1 }, // pmulld
+ { ISD::MUL, MVT::v4i64, 8 }, // 3*pmuludq/3*shift/2*add
{ ISD::FDIV, MVT::f32, 7 }, // Haswell from http://www.agner.org/
{ ISD::FDIV, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/
@@ -344,11 +347,10 @@ int X86TTIImpl::getArithmeticInstrCost(
};
// Look for AVX2 lowering tricks for custom cases.
- if (ST->hasAVX2()) {
+ if (ST->hasAVX2())
if (const auto *Entry = CostTableLookup(AVX2CustomCostTable, ISD,
LT.second))
return LT.first * Entry->Cost;
- }
static const CostTblEntry AVXCustomCostTable[] = {
{ ISD::MUL, MVT::v32i8, 26 }, // extend/pmullw/trunc sequence.
@@ -372,24 +374,10 @@ int X86TTIImpl::getArithmeticInstrCost(
};
// Look for AVX2 lowering tricks for custom cases.
- if (ST->hasAVX()) {
+ if (ST->hasAVX())
if (const auto *Entry = CostTableLookup(AVXCustomCostTable, ISD,
LT.second))
return LT.first * Entry->Cost;
- }
-
- static const CostTblEntry SSE42FloatCostTable[] = {
- { ISD::FDIV, MVT::f32, 14 }, // Nehalem from http://www.agner.org/
- { ISD::FDIV, MVT::v4f32, 14 }, // Nehalem from http://www.agner.org/
- { ISD::FDIV, MVT::f64, 22 }, // Nehalem from http://www.agner.org/
- { ISD::FDIV, MVT::v2f64, 22 }, // Nehalem from http://www.agner.org/
- };
-
- if (ST->hasSSE42()) {
- if (const auto *Entry = CostTableLookup(SSE42FloatCostTable, ISD,
- LT.second))
- return LT.first * Entry->Cost;
- }
static const CostTblEntry
SSE2UniformCostTable[] = {
@@ -452,6 +440,17 @@ int X86TTIImpl::getArithmeticInstrCost(
ISD = ISD::MUL;
}
+ static const CostTblEntry SSE42CostTable[] = {
+ { ISD::FDIV, MVT::f32, 14 }, // Nehalem from http://www.agner.org/
+ { ISD::FDIV, MVT::v4f32, 14 }, // Nehalem from http://www.agner.org/
+ { ISD::FDIV, MVT::f64, 22 }, // Nehalem from http://www.agner.org/
+ { ISD::FDIV, MVT::v2f64, 22 }, // Nehalem from http://www.agner.org/
+ };
+
+ if (ST->hasSSE42())
+ if (const auto *Entry = CostTableLookup(SSE42CostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+
static const CostTblEntry SSE41CostTable[] = {
{ ISD::SHL, MVT::v16i8, 11 }, // pblendvb sequence.
{ ISD::SHL, MVT::v32i8, 2*11 }, // pblendvb sequence.
@@ -471,44 +470,39 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v16i16, 2*14 }, // pblendvb sequence.
{ ISD::SRA, MVT::v4i32, 12 }, // Shift each lane + blend.
{ ISD::SRA, MVT::v8i32, 2*12 }, // Shift each lane + blend.
+
+ { ISD::MUL, MVT::v4i32, 1 } // pmulld
};
- if (ST->hasSSE41()) {
+ if (ST->hasSSE41())
if (const auto *Entry = CostTableLookup(SSE41CostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- }
static const CostTblEntry SSE2CostTable[] = {
// We don't correctly identify costs of casts because they are marked as
// custom.
{ ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence.
- { ISD::SHL, MVT::v32i8, 2*26 }, // cmpgtb sequence.
{ ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence.
- { ISD::SHL, MVT::v16i16, 2*32 }, // cmpgtb sequence.
{ ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
{ ISD::SHL, MVT::v8i32, 2*2*5 }, // We optimized this using mul.
{ ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence.
{ ISD::SHL, MVT::v4i64, 2*4 }, // splat+shuffle sequence.
{ ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence.
- { ISD::SRL, MVT::v32i8, 2*26 }, // cmpgtb sequence.
{ ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence.
- { ISD::SRL, MVT::v16i16, 2*32 }, // cmpgtb sequence.
{ ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend.
- { ISD::SRL, MVT::v8i32, 2*16 }, // Shift each lane + blend.
{ ISD::SRL, MVT::v2i64, 4 }, // splat+shuffle sequence.
{ ISD::SRL, MVT::v4i64, 2*4 }, // splat+shuffle sequence.
{ ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence.
- { ISD::SRA, MVT::v32i8, 2*54 }, // unpacked cmpgtb sequence.
{ ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence.
- { ISD::SRA, MVT::v16i16, 2*32 }, // cmpgtb sequence.
{ ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend.
- { ISD::SRA, MVT::v8i32, 2*16 }, // Shift each lane + blend.
{ ISD::SRA, MVT::v2i64, 12 }, // srl/xor/sub sequence.
{ ISD::SRA, MVT::v4i64, 2*12 }, // srl/xor/sub sequence.
{ ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence.
+ { ISD::MUL, MVT::v4i32, 6 }, // 3*pmuludq/4*shuffle
+ { ISD::MUL, MVT::v2i64, 8 }, // 3*pmuludq/3*shift/2*add
{ ISD::FDIV, MVT::f32, 23 }, // Pentium IV from http://www.agner.org/
{ ISD::FDIV, MVT::v4f32, 39 }, // Pentium IV from http://www.agner.org/
@@ -531,10 +525,9 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::UDIV, MVT::v2i64, 2*20 },
};
- if (ST->hasSSE2()) {
+ if (ST->hasSSE2())
if (const auto *Entry = CostTableLookup(SSE2CostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- }
static const CostTblEntry AVX1CostTable[] = {
// We don't have to scalarize unsupported ops. We can issue two half-sized
@@ -553,307 +546,278 @@ int X86TTIImpl::getArithmeticInstrCost(
// A v4i64 multiply is custom lowered as two split v2i64 vectors that then
// are lowered as a series of long multiplies(3), shifts(3) and adds(2)
// Because we believe v4i64 to be a legal type, we must also include the
- // split factor of two in the cost table. Therefore, the cost here is 16
+ // extract+insert in the cost table. Therefore, the cost here is 18
// instead of 8.
- { ISD::MUL, MVT::v4i64, 16 },
+ { ISD::MUL, MVT::v4i64, 18 },
};
// Look for AVX1 lowering tricks.
- if (ST->hasAVX() && !ST->hasAVX2()) {
- MVT VT = LT.second;
-
- if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, VT))
+ if (ST->hasAVX() && !ST->hasAVX2())
+ if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- }
- // Custom lowering of vectors.
- static const CostTblEntry CustomLowered[] = {
- // A v2i64/v4i64 and multiply is custom lowered as a series of long
- // multiplies(3), shifts(3) and adds(2).
- { ISD::MUL, MVT::v2i64, 8 },
- { ISD::MUL, MVT::v4i64, 8 },
- { ISD::MUL, MVT::v8i64, 8 }
- };
- if (const auto *Entry = CostTableLookup(CustomLowered, ISD, LT.second))
- return LT.first * Entry->Cost;
-
- // Special lowering of v4i32 mul on sse2, sse3: Lower v4i32 mul as 2x shuffle,
- // 2x pmuludq, 2x shuffle.
- if (ISD == ISD::MUL && LT.second == MVT::v4i32 && ST->hasSSE2() &&
- !ST->hasSSE41())
- return LT.first * 6;
-
- static const CostTblEntry SSE1FloatCostTable[] = {
+ static const CostTblEntry SSE1CostTable[] = {
{ ISD::FDIV, MVT::f32, 17 }, // Pentium III from http://www.agner.org/
{ ISD::FDIV, MVT::v4f32, 34 }, // Pentium III from http://www.agner.org/
};
if (ST->hasSSE1())
- if (const auto *Entry = CostTableLookup(SSE1FloatCostTable, ISD,
- LT.second))
+ if (const auto *Entry = CostTableLookup(SSE1CostTable, ISD, LT.second))
return LT.first * Entry->Cost;
+
// Fallback to the default implementation.
return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
}
int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
- if (Kind == TTI::SK_Reverse || Kind == TTI::SK_Alternate) {
- // 64-bit packed float vectors (v2f32) are widened to type v4f32.
- // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
-
- static const CostTblEntry AVX512VBMIShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v64i8, 1 }, // vpermb
- { TTI::SK_Reverse, MVT::v32i8, 1 } // vpermb
- };
-
- if (ST->hasVBMI())
- if (const auto *Entry =
- CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ // 64-bit packed float vectors (v2f32) are widened to type v4f32.
+ // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+
+ // For Broadcasts we are splatting the first element from the first input
+ // register, so only need to reference that input and all the output
+ // registers are the same.
+ if (Kind == TTI::SK_Broadcast)
+ LT.first = 1;
+
+ // We are going to permute multiple sources and the result will be in multiple
+ // destinations. Providing an accurate cost only for splits where the element
+ // type remains the same.
+ if (Kind == TTI::SK_PermuteSingleSrc && LT.first != 1) {
+ MVT LegalVT = LT.second;
+ if (LegalVT.getVectorElementType().getSizeInBits() ==
+ Tp->getVectorElementType()->getPrimitiveSizeInBits() &&
+ LegalVT.getVectorNumElements() < Tp->getVectorNumElements()) {
+
+ unsigned VecTySize = DL.getTypeStoreSize(Tp);
+ unsigned LegalVTSize = LegalVT.getStoreSize();
+ // Number of source vectors after legalization:
+ unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
+ // Number of destination vectors after legalization:
+ unsigned NumOfDests = LT.first;
+
+ Type *SingleOpTy = VectorType::get(Tp->getVectorElementType(),
+ LegalVT.getVectorNumElements());
+
+ unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
+ return NumOfShuffles *
+ getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, 0, nullptr);
+ }
- static const CostTblEntry AVX512BWShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw
- { TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw
- { TTI::SK_Reverse, MVT::v64i8, 6 } // vextracti64x4 + 2*vperm2i128
- // + 2*pshufb + vinserti64x4
- };
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ }
- if (ST->hasBWI())
- if (const auto *Entry =
- CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ // For 2-input shuffles, we must account for splitting the 2 inputs into many.
+ if (Kind == TTI::SK_PermuteTwoSrc && LT.first != 1) {
+ // We assume that source and destination have the same vector type.
+ int NumOfDests = LT.first;
+ int NumOfShufflesPerDest = LT.first * 2 - 1;
+ LT.first = NumOfDests * NumOfShufflesPerDest;
+ }
- static const CostTblEntry AVX512ShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v8f64, 1 }, // vpermpd
- { TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps
- { TTI::SK_Reverse, MVT::v8i64, 1 }, // vpermq
- { TTI::SK_Reverse, MVT::v16i32, 1 }, // vpermd
- };
+ static const CostTblEntry AVX512VBMIShuffleTbl[] = {
+ { TTI::SK_Reverse, MVT::v64i8, 1 }, // vpermb
+ { TTI::SK_Reverse, MVT::v32i8, 1 }, // vpermb
- if (ST->hasAVX512())
- if (const auto *Entry =
- CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ { TTI::SK_PermuteSingleSrc, MVT::v64i8, 1 }, // vpermb
+ { TTI::SK_PermuteSingleSrc, MVT::v32i8, 1 }, // vpermb
- static const CostTblEntry AVX2ShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v4f64, 1 }, // vpermpd
- { TTI::SK_Reverse, MVT::v8f32, 1 }, // vpermps
- { TTI::SK_Reverse, MVT::v4i64, 1 }, // vpermq
- { TTI::SK_Reverse, MVT::v8i32, 1 }, // vpermd
- { TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb
- { TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb
+ { TTI::SK_PermuteTwoSrc, MVT::v64i8, 1 }, // vpermt2b
+ { TTI::SK_PermuteTwoSrc, MVT::v32i8, 1 }, // vpermt2b
+ { TTI::SK_PermuteTwoSrc, MVT::v16i8, 1 } // vpermt2b
+ };
- { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw
- { TTI::SK_Alternate, MVT::v32i8, 1 } // vpblendvb
- };
+ if (ST->hasVBMI())
+ if (const auto *Entry =
+ CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
- if (ST->hasAVX2())
- if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ static const CostTblEntry AVX512BWShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v32i16, 1 }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v64i8, 1 }, // vpbroadcastb
+
+ { TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw
+ { TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw
+ { TTI::SK_Reverse, MVT::v64i8, 6 }, // vextracti64x4 + 2*vperm2i128
+ // + 2*pshufb + vinserti64x4
+
+ { TTI::SK_PermuteSingleSrc, MVT::v32i16, 1 }, // vpermw
+ { TTI::SK_PermuteSingleSrc, MVT::v16i16, 1 }, // vpermw
+ { TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // vpermw
+ { TTI::SK_PermuteSingleSrc, MVT::v64i8, 8 }, // extend to v32i16
+ { TTI::SK_PermuteSingleSrc, MVT::v32i8, 3 }, // vpermw + zext/trunc
+
+ { TTI::SK_PermuteTwoSrc, MVT::v32i16, 1 }, // vpermt2w
+ { TTI::SK_PermuteTwoSrc, MVT::v16i16, 1 }, // vpermt2w
+ { TTI::SK_PermuteTwoSrc, MVT::v8i16, 1 }, // vpermt2w
+ { TTI::SK_PermuteTwoSrc, MVT::v32i8, 3 }, // zext + vpermt2w + trunc
+ { TTI::SK_PermuteTwoSrc, MVT::v64i8, 19 }, // 6 * v32i8 + 1
+ { TTI::SK_PermuteTwoSrc, MVT::v16i8, 3 } // zext + vpermt2w + trunc
+ };
- static const CostTblEntry AVX1ShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
- { TTI::SK_Reverse, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
- { TTI::SK_Reverse, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
- { TTI::SK_Reverse, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
- { TTI::SK_Reverse, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
- // + vinsertf128
- { TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb
- // + vinsertf128
-
- { TTI::SK_Alternate, MVT::v4i64, 1 }, // vblendpd
- { TTI::SK_Alternate, MVT::v4f64, 1 }, // vblendpd
- { TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps
- { TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps
- { TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor
- { TTI::SK_Alternate, MVT::v32i8, 3 } // vpand + vpandn + vpor
- };
+ if (ST->hasBWI())
+ if (const auto *Entry =
+ CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
- if (ST->hasAVX())
- if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ static const CostTblEntry AVX512ShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v8f64, 1 }, // vbroadcastpd
+ { TTI::SK_Broadcast, MVT::v16f32, 1 }, // vbroadcastps
+ { TTI::SK_Broadcast, MVT::v8i64, 1 }, // vpbroadcastq
+ { TTI::SK_Broadcast, MVT::v16i32, 1 }, // vpbroadcastd
+
+ { TTI::SK_Reverse, MVT::v8f64, 1 }, // vpermpd
+ { TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps
+ { TTI::SK_Reverse, MVT::v8i64, 1 }, // vpermq
+ { TTI::SK_Reverse, MVT::v16i32, 1 }, // vpermd
+
+ { TTI::SK_PermuteSingleSrc, MVT::v8f64, 1 }, // vpermpd
+ { TTI::SK_PermuteSingleSrc, MVT::v4f64, 1 }, // vpermpd
+ { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // vpermpd
+ { TTI::SK_PermuteSingleSrc, MVT::v16f32, 1 }, // vpermps
+ { TTI::SK_PermuteSingleSrc, MVT::v8f32, 1 }, // vpermps
+ { TTI::SK_PermuteSingleSrc, MVT::v4f32, 1 }, // vpermps
+ { TTI::SK_PermuteSingleSrc, MVT::v8i64, 1 }, // vpermq
+ { TTI::SK_PermuteSingleSrc, MVT::v4i64, 1 }, // vpermq
+ { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // vpermq
+ { TTI::SK_PermuteSingleSrc, MVT::v16i32, 1 }, // vpermd
+ { TTI::SK_PermuteSingleSrc, MVT::v8i32, 1 }, // vpermd
+ { TTI::SK_PermuteSingleSrc, MVT::v4i32, 1 }, // vpermd
+ { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 }, // pshufb
+
+ { TTI::SK_PermuteTwoSrc, MVT::v8f64, 1 }, // vpermt2pd
+ { TTI::SK_PermuteTwoSrc, MVT::v16f32, 1 }, // vpermt2ps
+ { TTI::SK_PermuteTwoSrc, MVT::v8i64, 1 }, // vpermt2q
+ { TTI::SK_PermuteTwoSrc, MVT::v16i32, 1 }, // vpermt2d
+ { TTI::SK_PermuteTwoSrc, MVT::v4f64, 1 }, // vpermt2pd
+ { TTI::SK_PermuteTwoSrc, MVT::v8f32, 1 }, // vpermt2ps
+ { TTI::SK_PermuteTwoSrc, MVT::v4i64, 1 }, // vpermt2q
+ { TTI::SK_PermuteTwoSrc, MVT::v8i32, 1 }, // vpermt2d
+ { TTI::SK_PermuteTwoSrc, MVT::v2f64, 1 }, // vpermt2pd
+ { TTI::SK_PermuteTwoSrc, MVT::v4f32, 1 }, // vpermt2ps
+ { TTI::SK_PermuteTwoSrc, MVT::v2i64, 1 }, // vpermt2q
+ { TTI::SK_PermuteTwoSrc, MVT::v4i32, 1 } // vpermt2d
+ };
- static const CostTblEntry SSE41ShuffleTbl[] = {
- { TTI::SK_Alternate, MVT::v2i64, 1 }, // pblendw
- { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
- { TTI::SK_Alternate, MVT::v4i32, 1 }, // pblendw
- { TTI::SK_Alternate, MVT::v4f32, 1 }, // blendps
- { TTI::SK_Alternate, MVT::v8i16, 1 }, // pblendw
- { TTI::SK_Alternate, MVT::v16i8, 1 } // pblendvb
- };
-
- if (ST->hasSSE41())
- if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ if (ST->hasAVX512())
+ if (const auto *Entry = CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
- static const CostTblEntry SSSE3ShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb
- { TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb
+ static const CostTblEntry AVX2ShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v4f64, 1 }, // vbroadcastpd
+ { TTI::SK_Broadcast, MVT::v8f32, 1 }, // vbroadcastps
+ { TTI::SK_Broadcast, MVT::v4i64, 1 }, // vpbroadcastq
+ { TTI::SK_Broadcast, MVT::v8i32, 1 }, // vpbroadcastd
+ { TTI::SK_Broadcast, MVT::v16i16, 1 }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v32i8, 1 }, // vpbroadcastb
+
+ { TTI::SK_Reverse, MVT::v4f64, 1 }, // vpermpd
+ { TTI::SK_Reverse, MVT::v8f32, 1 }, // vpermps
+ { TTI::SK_Reverse, MVT::v4i64, 1 }, // vpermq
+ { TTI::SK_Reverse, MVT::v8i32, 1 }, // vpermd
+ { TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb
+ { TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb
+
+ { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw
+ { TTI::SK_Alternate, MVT::v32i8, 1 } // vpblendvb
+ };
- { TTI::SK_Alternate, MVT::v8i16, 3 }, // pshufb + pshufb + por
- { TTI::SK_Alternate, MVT::v16i8, 3 } // pshufb + pshufb + por
- };
+ if (ST->hasAVX2())
+ if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
- if (ST->hasSSSE3())
- if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ static const CostTblEntry AVX1ShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
+ { TTI::SK_Broadcast, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
+ { TTI::SK_Broadcast, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
+ { TTI::SK_Broadcast, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
+ { TTI::SK_Broadcast, MVT::v16i16, 3 }, // vpshuflw + vpshufd + vinsertf128
+ { TTI::SK_Broadcast, MVT::v32i8, 2 }, // vpshufb + vinsertf128
+
+ { TTI::SK_Reverse, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
+ { TTI::SK_Reverse, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
+ { TTI::SK_Reverse, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
+ { TTI::SK_Reverse, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
+ { TTI::SK_Reverse, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
+ // + vinsertf128
+ { TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb
+ // + vinsertf128
+
+ { TTI::SK_Alternate, MVT::v4i64, 1 }, // vblendpd
+ { TTI::SK_Alternate, MVT::v4f64, 1 }, // vblendpd
+ { TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps
+ { TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps
+ { TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor
+ { TTI::SK_Alternate, MVT::v32i8, 3 } // vpand + vpandn + vpor
+ };
- static const CostTblEntry SSE2ShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v2f64, 1 }, // shufpd
- { TTI::SK_Reverse, MVT::v2i64, 1 }, // pshufd
- { TTI::SK_Reverse, MVT::v4i32, 1 }, // pshufd
- { TTI::SK_Reverse, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd
- { TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw
- // + 2*pshufd + 2*unpck + packus
-
- { TTI::SK_Alternate, MVT::v2i64, 1 }, // movsd
- { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
- { TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps
- { TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por
- { TTI::SK_Alternate, MVT::v16i8, 3 } // pand + pandn + por
- };
-
- if (ST->hasSSE2())
- if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ if (ST->hasAVX())
+ if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
- static const CostTblEntry SSE1ShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps
- { TTI::SK_Alternate, MVT::v4f32, 2 } // 2*shufps
- };
+ static const CostTblEntry SSE41ShuffleTbl[] = {
+ { TTI::SK_Alternate, MVT::v2i64, 1 }, // pblendw
+ { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
+ { TTI::SK_Alternate, MVT::v4i32, 1 }, // pblendw
+ { TTI::SK_Alternate, MVT::v4f32, 1 }, // blendps
+ { TTI::SK_Alternate, MVT::v8i16, 1 }, // pblendw
+ { TTI::SK_Alternate, MVT::v16i8, 1 } // pblendvb
+ };
- if (ST->hasSSE1())
- if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ if (ST->hasSSE41())
+ if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
- } else if (Kind == TTI::SK_PermuteTwoSrc) {
- // We assume that source and destination have the same vector type.
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
- int NumOfDests = LT.first;
- int NumOfShufflesPerDest = LT.first * 2 - 1;
- int NumOfShuffles = NumOfDests * NumOfShufflesPerDest;
-
- static const CostTblEntry AVX512VBMIShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v64i8, 1}, // vpermt2b
- {ISD::VECTOR_SHUFFLE, MVT::v32i8, 1}, // vpermt2b
- {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1} // vpermt2b
- };
-
- if (ST->hasVBMI())
- if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
- return NumOfShuffles * Entry->Cost;
-
- static const CostTblEntry AVX512BWShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v32i16, 1}, // vpermt2w
- {ISD::VECTOR_SHUFFLE, MVT::v16i16, 1}, // vpermt2w
- {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, // vpermt2w
- {ISD::VECTOR_SHUFFLE, MVT::v32i8, 3}, // zext + vpermt2w + trunc
- {ISD::VECTOR_SHUFFLE, MVT::v64i8, 19}, // 6 * v32i8 + 1
- {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // zext + vpermt2w + trunc
- };
-
- if (ST->hasBWI())
- if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
- return NumOfShuffles * Entry->Cost;
-
- static const CostTblEntry AVX512ShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v8f64, 1}, // vpermt2pd
- {ISD::VECTOR_SHUFFLE, MVT::v16f32, 1}, // vpermt2ps
- {ISD::VECTOR_SHUFFLE, MVT::v8i64, 1}, // vpermt2q
- {ISD::VECTOR_SHUFFLE, MVT::v16i32, 1}, // vpermt2d
- {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vpermt2pd
- {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vpermt2ps
- {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vpermt2q
- {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vpermt2d
- {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // vpermt2pd
- {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, // vpermt2ps
- {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // vpermt2q
- {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1} // vpermt2d
- };
+ static const CostTblEntry SSSE3ShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v8i16, 1 }, // pshufb
+ { TTI::SK_Broadcast, MVT::v16i8, 1 }, // pshufb
- if (ST->hasAVX512())
- if (const auto *Entry =
- CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
- return NumOfShuffles * Entry->Cost;
-
- } else if (Kind == TTI::SK_PermuteSingleSrc) {
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
- if (LT.first == 1) {
-
- static const CostTblEntry AVX512VBMIShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v64i8, 1}, // vpermb
- {ISD::VECTOR_SHUFFLE, MVT::v32i8, 1} // vpermb
- };
-
- if (ST->hasVBMI())
- if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
- return Entry->Cost;
-
- static const CostTblEntry AVX512BWShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v32i16, 1}, // vpermw
- {ISD::VECTOR_SHUFFLE, MVT::v16i16, 1}, // vpermw
- {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, // vpermw
- {ISD::VECTOR_SHUFFLE, MVT::v64i8, 8}, // extend to v32i16
- {ISD::VECTOR_SHUFFLE, MVT::v32i8, 3} // vpermw + zext/trunc
- };
-
- if (ST->hasBWI())
- if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
- return Entry->Cost;
-
- static const CostTblEntry AVX512ShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v8f64, 1}, // vpermpd
- {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vpermpd
- {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // vpermpd
- {ISD::VECTOR_SHUFFLE, MVT::v16f32, 1}, // vpermps
- {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vpermps
- {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, // vpermps
- {ISD::VECTOR_SHUFFLE, MVT::v8i64, 1}, // vpermq
- {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vpermq
- {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // vpermq
- {ISD::VECTOR_SHUFFLE, MVT::v16i32, 1}, // vpermd
- {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vpermd
- {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}, // vpermd
- {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1} // pshufb
- };
-
- if (ST->hasAVX512())
- if (const auto *Entry =
- CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
- return Entry->Cost;
-
- } else {
- // We are going to permute multiple sources and the result will be in
- // multiple destinations. Providing an accurate cost only for splits where
- // the element type remains the same.
-
- MVT LegalVT = LT.second;
- if (LegalVT.getVectorElementType().getSizeInBits() ==
- Tp->getVectorElementType()->getPrimitiveSizeInBits() &&
- LegalVT.getVectorNumElements() < Tp->getVectorNumElements()) {
-
- unsigned VecTySize = DL.getTypeStoreSize(Tp);
- unsigned LegalVTSize = LegalVT.getStoreSize();
- // Number of source vectors after legalization:
- unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
- // Number of destination vectors after legalization:
- unsigned NumOfDests = LT.first;
-
- Type *SingleOpTy = VectorType::get(Tp->getVectorElementType(),
- LegalVT.getVectorNumElements());
-
- unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
- return NumOfShuffles *
- getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, 0, nullptr);
- }
- }
- }
+ { TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb
+ { TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb
+
+ { TTI::SK_Alternate, MVT::v8i16, 3 }, // pshufb + pshufb + por
+ { TTI::SK_Alternate, MVT::v16i8, 3 } // pshufb + pshufb + por
+ };
+
+ if (ST->hasSSSE3())
+ if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
+
+ static const CostTblEntry SSE2ShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v2f64, 1 }, // shufpd
+ { TTI::SK_Broadcast, MVT::v2i64, 1 }, // pshufd
+ { TTI::SK_Broadcast, MVT::v4i32, 1 }, // pshufd
+ { TTI::SK_Broadcast, MVT::v8i16, 2 }, // pshuflw + pshufd
+ { TTI::SK_Broadcast, MVT::v16i8, 3 }, // unpck + pshuflw + pshufd
+
+ { TTI::SK_Reverse, MVT::v2f64, 1 }, // shufpd
+ { TTI::SK_Reverse, MVT::v2i64, 1 }, // pshufd
+ { TTI::SK_Reverse, MVT::v4i32, 1 }, // pshufd
+ { TTI::SK_Reverse, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd
+ { TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw
+ // + 2*pshufd + 2*unpck + packus
+
+ { TTI::SK_Alternate, MVT::v2i64, 1 }, // movsd
+ { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
+ { TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps
+ { TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por
+ { TTI::SK_Alternate, MVT::v16i8, 3 } // pand + pandn + por
+ };
+
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
+
+ static const CostTblEntry SSE1ShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v4f32, 1 }, // shufps
+ { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps
+ { TTI::SK_Alternate, MVT::v4f32, 2 } // 2*shufps
+ };
+
+ if (ST->hasSSE1())
+ if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
@@ -1623,17 +1587,29 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
return Cost+LT.first;
}
-int X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int X86TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
+ const SCEV *Ptr) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
// extra micro-ops can significantly decrease throughput.
unsigned NumVectorInstToHideOverhead = 10;
- if (Ty->isVectorTy() && IsComplex)
- return NumVectorInstToHideOverhead;
+ // Cost modeling of Strided Access Computation is hidden by the indexing
+ // modes of X86 regardless of the stride value. We dont believe that there
+ // is a difference between constant strided access in gerenal and constant
+ // strided value which is less than or equal to 64.
+ // Even in the case of (loop invariant) stride whose value is not known at
+ // compile time, the address computation will not incur more than one extra
+ // ADD instruction.
+ if (Ty->isVectorTy() && SE) {
+ if (!BaseT::isStridedAccess(Ptr))
+ return NumVectorInstToHideOverhead;
+ if (!BaseT::getConstantStrideStep(SE, Ptr))
+ return 1;
+ }
- return BaseT::getAddressComputationCost(Ty, IsComplex);
+ return BaseT::getAddressComputationCost(Ty, SE, Ptr);
}
int X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h
index f6bcb9f569e4..c013805f4321 100644
--- a/lib/Target/X86/X86TargetTransformInfo.h
+++ b/lib/Target/X86/X86TargetTransformInfo.h
@@ -71,7 +71,8 @@ public:
unsigned AddressSpace);
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
bool VariableMask, unsigned Alignment);
- int getAddressComputationCost(Type *PtrTy, bool IsComplex);
+ int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
+ const SCEV *Ptr);
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF);
diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp
index 6dd95f8dcd55..6b32f6c31f72 100644
--- a/lib/Transforms/IPO/FunctionImport.cpp
+++ b/lib/Transforms/IPO/FunctionImport.cpp
@@ -36,7 +36,10 @@
using namespace llvm;
-STATISTIC(NumImported, "Number of functions imported");
+STATISTIC(NumImportedFunctions, "Number of functions imported");
+STATISTIC(NumImportedModules, "Number of modules imported from");
+STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index");
+STATISTIC(NumLiveSymbols, "Number of live symbols in index");
/// Limit on instruction count of imported functions.
static cl::opt<unsigned> ImportInstrLimit(
@@ -69,6 +72,9 @@ static cl::opt<float> ImportColdMultiplier(
static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
cl::desc("Print imported functions"));
+static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden,
+ cl::desc("Compute dead symbols"));
+
// Temporary allows the function import pass to disable always linking
// referenced discardable symbols.
static cl::opt<bool>
@@ -105,78 +111,6 @@ static std::unique_ptr<Module> loadFile(const std::string &FileName,
namespace {
-// Return true if the Summary describes a GlobalValue that can be externally
-// referenced, i.e. it does not need renaming (linkage is not local) or renaming
-// is possible (does not have a section for instance).
-static bool canBeExternallyReferenced(const GlobalValueSummary &Summary) {
- if (!Summary.needsRenaming())
- return true;
-
- if (Summary.noRename())
- // Can't externally reference a global that needs renaming if has a section
- // or is referenced from inline assembly, for example.
- return false;
-
- return true;
-}
-
-// Return true if \p GUID describes a GlobalValue that can be externally
-// referenced, i.e. it does not need renaming (linkage is not local) or
-// renaming is possible (does not have a section for instance).
-static bool canBeExternallyReferenced(const ModuleSummaryIndex &Index,
- GlobalValue::GUID GUID) {
- auto Summaries = Index.findGlobalValueSummaryList(GUID);
- if (Summaries == Index.end())
- return true;
- if (Summaries->second.size() != 1)
- // If there are multiple globals with this GUID, then we know it is
- // not a local symbol, and it is necessarily externally referenced.
- return true;
-
- // We don't need to check for the module path, because if it can't be
- // externally referenced and we call it, it is necessarilly in the same
- // module
- return canBeExternallyReferenced(**Summaries->second.begin());
-}
-
-// Return true if the global described by \p Summary can be imported in another
-// module.
-static bool eligibleForImport(const ModuleSummaryIndex &Index,
- const GlobalValueSummary &Summary) {
- if (!canBeExternallyReferenced(Summary))
- // Can't import a global that needs renaming if has a section for instance.
- // FIXME: we may be able to import it by copying it without promotion.
- return false;
-
- // Don't import functions that are not viable to inline.
- if (Summary.isNotViableToInline())
- return false;
-
- // Check references (and potential calls) in the same module. If the current
- // value references a global that can't be externally referenced it is not
- // eligible for import. First check the flag set when we have possible
- // opaque references (e.g. inline asm calls), then check the call and
- // reference sets.
- if (Summary.hasInlineAsmMaybeReferencingInternal())
- return false;
- bool AllRefsCanBeExternallyReferenced =
- llvm::all_of(Summary.refs(), [&](const ValueInfo &VI) {
- return canBeExternallyReferenced(Index, VI.getGUID());
- });
- if (!AllRefsCanBeExternallyReferenced)
- return false;
-
- if (auto *FuncSummary = dyn_cast<FunctionSummary>(&Summary)) {
- bool AllCallsCanBeExternallyReferenced = llvm::all_of(
- FuncSummary->calls(), [&](const FunctionSummary::EdgeTy &Edge) {
- return canBeExternallyReferenced(Index, Edge.first.getGUID());
- });
- if (!AllCallsCanBeExternallyReferenced)
- return false;
- }
- return true;
-}
-
/// Given a list of possible callee implementation for a call site, select one
/// that fits the \p Threshold.
///
@@ -214,7 +148,7 @@ selectCallee(const ModuleSummaryIndex &Index,
if (Summary->instCount() > Threshold)
return false;
- if (!eligibleForImport(Index, *Summary))
+ if (Summary->notEligibleToImport())
return false;
return true;
@@ -346,7 +280,8 @@ static void computeImportForFunction(
static void ComputeImportForModule(
const GVSummaryMapTy &DefinedGVSummaries, const ModuleSummaryIndex &Index,
FunctionImporter::ImportMapTy &ImportList,
- StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) {
+ StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr,
+ const DenseSet<GlobalValue::GUID> *DeadSymbols = nullptr) {
// Worklist contains the list of function imported in this module, for which
// we will analyse the callees and may import further down the callgraph.
SmallVector<EdgeInfo, 128> Worklist;
@@ -354,6 +289,10 @@ static void ComputeImportForModule(
// Populate the worklist with the import for the functions in the current
// module
for (auto &GVSummary : DefinedGVSummaries) {
+ if (DeadSymbols && DeadSymbols->count(GVSummary.first)) {
+ DEBUG(dbgs() << "Ignores Dead GUID: " << GVSummary.first << "\n");
+ continue;
+ }
auto *Summary = GVSummary.second;
if (auto *AS = dyn_cast<AliasSummary>(Summary))
Summary = &AS->getAliasee();
@@ -393,14 +332,15 @@ void llvm::ComputeCrossModuleImport(
const ModuleSummaryIndex &Index,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
StringMap<FunctionImporter::ImportMapTy> &ImportLists,
- StringMap<FunctionImporter::ExportSetTy> &ExportLists) {
+ StringMap<FunctionImporter::ExportSetTy> &ExportLists,
+ const DenseSet<GlobalValue::GUID> *DeadSymbols) {
// For each module that has function defined, compute the import/export lists.
for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
auto &ImportList = ImportLists[DefinedGVSummaries.first()];
DEBUG(dbgs() << "Computing import for Module '"
<< DefinedGVSummaries.first() << "'\n");
ComputeImportForModule(DefinedGVSummaries.second, Index, ImportList,
- &ExportLists);
+ &ExportLists, DeadSymbols);
}
// When computing imports we added all GUIDs referenced by anything
@@ -462,6 +402,86 @@ void llvm::ComputeCrossModuleImportForModule(
#endif
}
+DenseSet<GlobalValue::GUID> llvm::computeDeadSymbols(
+ const ModuleSummaryIndex &Index,
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
+ if (!ComputeDead)
+ return DenseSet<GlobalValue::GUID>();
+ if (GUIDPreservedSymbols.empty())
+ // Don't do anything when nothing is live, this is friendly with tests.
+ return DenseSet<GlobalValue::GUID>();
+ DenseSet<GlobalValue::GUID> LiveSymbols = GUIDPreservedSymbols;
+ SmallVector<GlobalValue::GUID, 128> Worklist;
+ Worklist.reserve(LiveSymbols.size() * 2);
+ for (auto GUID : LiveSymbols) {
+ DEBUG(dbgs() << "Live root: " << GUID << "\n");
+ Worklist.push_back(GUID);
+ }
+ // Add values flagged in the index as live roots to the worklist.
+ for (const auto &Entry : Index) {
+ bool IsLiveRoot = llvm::any_of(
+ Entry.second,
+ [&](const std::unique_ptr<llvm::GlobalValueSummary> &Summary) {
+ return Summary->liveRoot();
+ });
+ if (!IsLiveRoot)
+ continue;
+ DEBUG(dbgs() << "Live root (summary): " << Entry.first << "\n");
+ Worklist.push_back(Entry.first);
+ }
+
+ while (!Worklist.empty()) {
+ auto GUID = Worklist.pop_back_val();
+ auto It = Index.findGlobalValueSummaryList(GUID);
+ if (It == Index.end()) {
+ DEBUG(dbgs() << "Not in index: " << GUID << "\n");
+ continue;
+ }
+
+ // FIXME: we should only make the prevailing copy live here
+ for (auto &Summary : It->second) {
+ for (auto Ref : Summary->refs()) {
+ auto RefGUID = Ref.getGUID();
+ if (LiveSymbols.insert(RefGUID).second) {
+ DEBUG(dbgs() << "Marking live (ref): " << RefGUID << "\n");
+ Worklist.push_back(RefGUID);
+ }
+ }
+ if (auto *FS = dyn_cast<FunctionSummary>(Summary.get())) {
+ for (auto Call : FS->calls()) {
+ auto CallGUID = Call.first.getGUID();
+ if (LiveSymbols.insert(CallGUID).second) {
+ DEBUG(dbgs() << "Marking live (call): " << CallGUID << "\n");
+ Worklist.push_back(CallGUID);
+ }
+ }
+ }
+ if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
+ auto AliaseeGUID = AS->getAliasee().getOriginalName();
+ if (LiveSymbols.insert(AliaseeGUID).second) {
+ DEBUG(dbgs() << "Marking live (alias): " << AliaseeGUID << "\n");
+ Worklist.push_back(AliaseeGUID);
+ }
+ }
+ }
+ }
+ DenseSet<GlobalValue::GUID> DeadSymbols;
+ DeadSymbols.reserve(
+ std::min(Index.size(), Index.size() - LiveSymbols.size()));
+ for (auto &Entry : Index) {
+ auto GUID = Entry.first;
+ if (!LiveSymbols.count(GUID)) {
+ DEBUG(dbgs() << "Marking dead: " << GUID << "\n");
+ DeadSymbols.insert(GUID);
+ }
+ }
+ DEBUG(dbgs() << LiveSymbols.size() << " symbols Live, and "
+ << DeadSymbols.size() << " symbols Dead \n");
+ NumDeadSymbols += DeadSymbols.size();
+ NumLiveSymbols += LiveSymbols.size();
+ return DeadSymbols;
+}
+
/// Compute the set of summaries needed for a ThinLTO backend compilation of
/// \p ModulePath.
void llvm::gatherImportedSummariesForModule(
@@ -625,7 +645,6 @@ Expected<bool> FunctionImporter::importFunctions(
// now, before linking it (otherwise this will be a noop).
if (Error Err = SrcModule->materializeMetadata())
return std::move(Err);
- UpgradeDebugInfo(*SrcModule);
auto &ImportGUIDs = FunctionsToImportPerModule->second;
// Find the globals to import
@@ -698,6 +717,10 @@ Expected<bool> FunctionImporter::importFunctions(
}
}
+ // Upgrade debug info after we're done materializing all the globals and we
+ // have loaded all the required metadata!
+ UpgradeDebugInfo(*SrcModule);
+
// Link in the specified functions.
if (renameModuleForThinLTO(*SrcModule, Index, &GlobalsToImport))
return true;
@@ -717,9 +740,10 @@ Expected<bool> FunctionImporter::importFunctions(
report_fatal_error("Function Import: link error");
ImportedCount += GlobalsToImport.size();
+ NumImportedModules++;
}
- NumImported += ImportedCount;
+ NumImportedFunctions += ImportedCount;
DEBUG(dbgs() << "Imported " << ImportedCount << " functions for Module "
<< DestModule.getModuleIdentifier() << "\n");
diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp
index 2948878cffc4..f4742aaf748f 100644
--- a/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -27,9 +27,12 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSummaryIndexYAML.h"
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/TrailingObjects.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
@@ -52,6 +55,20 @@ static cl::opt<bool> AvoidReuse(
cl::desc("Try to avoid reuse of byte array addresses using aliases"),
cl::Hidden, cl::init(true));
+static cl::opt<std::string> ClSummaryAction(
+ "lowertypetests-summary-action",
+ cl::desc("What to do with the summary when running this pass"), cl::Hidden);
+
+static cl::opt<std::string> ClReadSummary(
+ "lowertypetests-read-summary",
+ cl::desc("Read summary from given YAML file before running pass"),
+ cl::Hidden);
+
+static cl::opt<std::string> ClWriteSummary(
+ "lowertypetests-write-summary",
+ cl::desc("Write summary to given YAML file after running pass"),
+ cl::Hidden);
+
bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
if (Offset < ByteOffset)
return false;
@@ -66,38 +83,6 @@ bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
return Bits.count(BitOffset);
}
-bool BitSetInfo::containsValue(
- const DataLayout &DL,
- const DenseMap<GlobalObject *, uint64_t> &GlobalLayout, Value *V,
- uint64_t COffset) const {
- if (auto GV = dyn_cast<GlobalObject>(V)) {
- auto I = GlobalLayout.find(GV);
- if (I == GlobalLayout.end())
- return false;
- return containsGlobalOffset(I->second + COffset);
- }
-
- if (auto GEP = dyn_cast<GEPOperator>(V)) {
- APInt APOffset(DL.getPointerSizeInBits(0), 0);
- bool Result = GEP->accumulateConstantOffset(DL, APOffset);
- if (!Result)
- return false;
- COffset += APOffset.getZExtValue();
- return containsValue(DL, GlobalLayout, GEP->getPointerOperand(), COffset);
- }
-
- if (auto Op = dyn_cast<Operator>(V)) {
- if (Op->getOpcode() == Instruction::BitCast)
- return containsValue(DL, GlobalLayout, Op->getOperand(0), COffset);
-
- if (Op->getOpcode() == Instruction::Select)
- return containsValue(DL, GlobalLayout, Op->getOperand(1), COffset) &&
- containsValue(DL, GlobalLayout, Op->getOperand(2), COffset);
- }
-
- return false;
-}
-
void BitSetInfo::print(raw_ostream &OS) const {
OS << "offset " << ByteOffset << " size " << BitSize << " align "
<< (1 << AlignLog2);
@@ -204,7 +189,7 @@ struct ByteArrayInfo {
std::set<uint64_t> Bits;
uint64_t BitSize;
GlobalVariable *ByteArray;
- Constant *Mask;
+ GlobalVariable *MaskGlobal;
};
/// A POD-like structure that we use to store a global reference together with
@@ -241,6 +226,9 @@ public:
class LowerTypeTestsModule {
Module &M;
+ // This is for testing purposes only.
+ std::unique_ptr<ModuleSummaryIndex> OwnedSummary;
+
bool LinkerSubsectionsViaSymbols;
Triple::ArchType Arch;
Triple::OSType OS;
@@ -248,6 +236,7 @@ class LowerTypeTestsModule {
IntegerType *Int1Ty = Type::getInt1Ty(M.getContext());
IntegerType *Int8Ty = Type::getInt8Ty(M.getContext());
+ PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
IntegerType *Int32Ty = Type::getInt32Ty(M.getContext());
PointerType *Int32PtrTy = PointerType::getUnqual(Int32Ty);
IntegerType *Int64Ty = Type::getInt64Ty(M.getContext());
@@ -259,6 +248,37 @@ class LowerTypeTestsModule {
// Mapping from type identifiers to the call sites that test them.
DenseMap<Metadata *, std::vector<CallInst *>> TypeTestCallSites;
+ /// This structure describes how to lower type tests for a particular type
+ /// identifier. It is either built directly from the global analysis (during
+ /// regular LTO or the regular LTO phase of ThinLTO), or indirectly using type
+ /// identifier summaries and external symbol references (in ThinLTO backends).
+ struct TypeIdLowering {
+ TypeTestResolution::Kind TheKind;
+
+ /// All except Unsat: the start address within the combined global.
+ Constant *OffsetedGlobal;
+
+ /// ByteArray, Inline, AllOnes: log2 of the required global alignment
+ /// relative to the start address.
+ Constant *AlignLog2;
+
+ /// ByteArray, Inline, AllOnes: size of the memory region covering members
+ /// of this type identifier as a multiple of 2^AlignLog2.
+ Constant *Size;
+
+ /// ByteArray, Inline, AllOnes: range of the size expressed as a bit width.
+ unsigned SizeBitWidth;
+
+ /// ByteArray: the byte array to test the address against.
+ Constant *TheByteArray;
+
+ /// ByteArray: the bit mask to apply to bytes loaded from the byte array.
+ Constant *BitMask;
+
+ /// Inline: the bit mask to test the address against.
+ Constant *InlineBits;
+ };
+
std::vector<ByteArrayInfo> ByteArrayInfos;
Function *WeakInitializerFn = nullptr;
@@ -268,15 +288,13 @@ class LowerTypeTestsModule {
const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout);
ByteArrayInfo *createByteArray(BitSetInfo &BSI);
void allocateByteArrays();
- Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI,
+ Value *createBitSetTest(IRBuilder<> &B, const TypeIdLowering &TIL,
Value *BitOffset);
void lowerTypeTestCalls(
ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout);
- Value *
- lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
- Constant *CombinedGlobal,
- const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
+ Value *lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
+ const TypeIdLowering &TIL);
void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> TypeIds,
ArrayRef<GlobalTypeMember *> Globals);
unsigned getJumpTableEntrySize();
@@ -302,6 +320,7 @@ class LowerTypeTestsModule {
public:
LowerTypeTestsModule(Module &M);
+ ~LowerTypeTestsModule();
bool lower();
};
@@ -380,7 +399,7 @@ ByteArrayInfo *LowerTypeTestsModule::createByteArray(BitSetInfo &BSI) {
BAI->Bits = BSI.Bits;
BAI->BitSize = BSI.BitSize;
BAI->ByteArray = ByteArrayGlobal;
- BAI->Mask = ConstantExpr::getPtrToInt(MaskGlobal, Int8Ty);
+ BAI->MaskGlobal = MaskGlobal;
return BAI;
}
@@ -399,8 +418,9 @@ void LowerTypeTestsModule::allocateByteArrays() {
uint8_t Mask;
BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask);
- BAI->Mask->replaceAllUsesWith(ConstantInt::get(Int8Ty, Mask));
- cast<GlobalVariable>(BAI->Mask->getOperand(0))->eraseFromParent();
+ BAI->MaskGlobal->replaceAllUsesWith(
+ ConstantExpr::getIntToPtr(ConstantInt::get(Int8Ty, Mask), Int8PtrTy));
+ BAI->MaskGlobal->eraseFromParent();
}
Constant *ByteArrayConst = ConstantDataArray::get(M.getContext(), BAB.Bytes);
@@ -435,101 +455,121 @@ void LowerTypeTestsModule::allocateByteArrays() {
ByteArraySizeBytes = BAB.Bytes.size();
}
-/// Build a test that bit BitOffset is set in BSI, where
-/// BitSetGlobal is a global containing the bits in BSI.
-Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
- ByteArrayInfo *&BAI,
+/// Build a test that bit BitOffset is set in the type identifier that was
+/// lowered to TIL, which must be either an Inline or a ByteArray.
+Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B,
+ const TypeIdLowering &TIL,
Value *BitOffset) {
- if (BSI.BitSize <= 64) {
+ if (TIL.TheKind == TypeTestResolution::Inline) {
// If the bit set is sufficiently small, we can avoid a load by bit testing
// a constant.
- IntegerType *BitsTy;
- if (BSI.BitSize <= 32)
- BitsTy = Int32Ty;
- else
- BitsTy = Int64Ty;
-
- uint64_t Bits = 0;
- for (auto Bit : BSI.Bits)
- Bits |= uint64_t(1) << Bit;
- Constant *BitsConst = ConstantInt::get(BitsTy, Bits);
- return createMaskedBitTest(B, BitsConst, BitOffset);
+ return createMaskedBitTest(B, TIL.InlineBits, BitOffset);
} else {
- if (!BAI) {
- ++NumByteArraysCreated;
- BAI = createByteArray(BSI);
- }
-
- Constant *ByteArray = BAI->ByteArray;
- Type *Ty = BAI->ByteArray->getValueType();
+ Constant *ByteArray = TIL.TheByteArray;
if (!LinkerSubsectionsViaSymbols && AvoidReuse) {
// Each use of the byte array uses a different alias. This makes the
// backend less likely to reuse previously computed byte array addresses,
// improving the security of the CFI mechanism based on this pass.
- ByteArray = GlobalAlias::create(BAI->ByteArray->getValueType(), 0,
- GlobalValue::PrivateLinkage, "bits_use",
- ByteArray, &M);
+ ByteArray = GlobalAlias::create(Int8Ty, 0, GlobalValue::PrivateLinkage,
+ "bits_use", ByteArray, &M);
}
- Value *ByteAddr = B.CreateGEP(Ty, ByteArray, BitOffset);
+ Value *ByteAddr = B.CreateGEP(Int8Ty, ByteArray, BitOffset);
Value *Byte = B.CreateLoad(ByteAddr);
- Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask);
+ Value *ByteAndMask =
+ B.CreateAnd(Byte, ConstantExpr::getPtrToInt(TIL.BitMask, Int8Ty));
return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0));
}
}
+static bool isKnownTypeIdMember(Metadata *TypeId, const DataLayout &DL,
+ Value *V, uint64_t COffset) {
+ if (auto GV = dyn_cast<GlobalObject>(V)) {
+ SmallVector<MDNode *, 2> Types;
+ GV->getMetadata(LLVMContext::MD_type, Types);
+ for (MDNode *Type : Types) {
+ if (Type->getOperand(1) != TypeId)
+ continue;
+ uint64_t Offset =
+ cast<ConstantInt>(
+ cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
+ ->getZExtValue();
+ if (COffset == Offset)
+ return true;
+ }
+ return false;
+ }
+
+ if (auto GEP = dyn_cast<GEPOperator>(V)) {
+ APInt APOffset(DL.getPointerSizeInBits(0), 0);
+ bool Result = GEP->accumulateConstantOffset(DL, APOffset);
+ if (!Result)
+ return false;
+ COffset += APOffset.getZExtValue();
+ return isKnownTypeIdMember(TypeId, DL, GEP->getPointerOperand(), COffset);
+ }
+
+ if (auto Op = dyn_cast<Operator>(V)) {
+ if (Op->getOpcode() == Instruction::BitCast)
+ return isKnownTypeIdMember(TypeId, DL, Op->getOperand(0), COffset);
+
+ if (Op->getOpcode() == Instruction::Select)
+ return isKnownTypeIdMember(TypeId, DL, Op->getOperand(1), COffset) &&
+ isKnownTypeIdMember(TypeId, DL, Op->getOperand(2), COffset);
+ }
+
+ return false;
+}
+
/// Lower a llvm.type.test call to its implementation. Returns the value to
/// replace the call with.
-Value *LowerTypeTestsModule::lowerBitSetCall(
- CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
- Constant *CombinedGlobalIntAddr,
- const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
+Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
+ const TypeIdLowering &TIL) {
+ if (TIL.TheKind == TypeTestResolution::Unsat)
+ return ConstantInt::getFalse(M.getContext());
+
Value *Ptr = CI->getArgOperand(0);
const DataLayout &DL = M.getDataLayout();
-
- if (BSI.containsValue(DL, GlobalLayout, Ptr))
+ if (isKnownTypeIdMember(TypeId, DL, Ptr, 0))
return ConstantInt::getTrue(M.getContext());
- Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd(
- CombinedGlobalIntAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset));
-
BasicBlock *InitialBB = CI->getParent();
IRBuilder<> B(CI);
Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy);
- if (BSI.isSingleOffset())
+ Constant *OffsetedGlobalAsInt =
+ ConstantExpr::getPtrToInt(TIL.OffsetedGlobal, IntPtrTy);
+ if (TIL.TheKind == TypeTestResolution::Single)
return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt);
Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt);
- Value *BitOffset;
- if (BSI.AlignLog2 == 0) {
- BitOffset = PtrOffset;
- } else {
- // We need to check that the offset both falls within our range and is
- // suitably aligned. We can check both properties at the same time by
- // performing a right rotate by log2(alignment) followed by an integer
- // comparison against the bitset size. The rotate will move the lower
- // order bits that need to be zero into the higher order bits of the
- // result, causing the comparison to fail if they are nonzero. The rotate
- // also conveniently gives us a bit offset to use during the load from
- // the bitset.
- Value *OffsetSHR =
- B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2));
- Value *OffsetSHL = B.CreateShl(
- PtrOffset,
- ConstantInt::get(IntPtrTy, DL.getPointerSizeInBits(0) - BSI.AlignLog2));
- BitOffset = B.CreateOr(OffsetSHR, OffsetSHL);
- }
-
- Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BSI.BitSize);
+ // We need to check that the offset both falls within our range and is
+ // suitably aligned. We can check both properties at the same time by
+ // performing a right rotate by log2(alignment) followed by an integer
+ // comparison against the bitset size. The rotate will move the lower
+ // order bits that need to be zero into the higher order bits of the
+ // result, causing the comparison to fail if they are nonzero. The rotate
+ // also conveniently gives us a bit offset to use during the load from
+ // the bitset.
+ Value *OffsetSHR =
+ B.CreateLShr(PtrOffset, ConstantExpr::getZExt(TIL.AlignLog2, IntPtrTy));
+ Value *OffsetSHL = B.CreateShl(
+ PtrOffset, ConstantExpr::getZExt(
+ ConstantExpr::getSub(
+ ConstantInt::get(Int8Ty, DL.getPointerSizeInBits(0)),
+ TIL.AlignLog2),
+ IntPtrTy));
+ Value *BitOffset = B.CreateOr(OffsetSHR, OffsetSHL);
+
+ Constant *BitSizeConst = ConstantExpr::getZExt(TIL.Size, IntPtrTy);
Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst);
// If the bit set is all ones, testing against it is unnecessary.
- if (BSI.isAllOnes())
+ if (TIL.TheKind == TypeTestResolution::AllOnes)
return OffsetInRange;
TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false);
@@ -537,7 +577,7 @@ Value *LowerTypeTestsModule::lowerBitSetCall(
// Now that we know that the offset is in range and aligned, load the
// appropriate bit from the bitset.
- Value *Bit = createBitSetTest(ThenB, BSI, BAI, BitOffset);
+ Value *Bit = createBitSetTest(ThenB, TIL, BitOffset);
// The value we want is 0 if we came directly from the initial block
// (having failed the range or alignment checks), or the loaded bit if
@@ -622,11 +662,7 @@ void LowerTypeTestsModule::buildBitSetsFromGlobalVariables(
void LowerTypeTestsModule::lowerTypeTestCalls(
ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) {
- Constant *CombinedGlobalIntAddr =
- ConstantExpr::getPtrToInt(CombinedGlobalAddr, IntPtrTy);
- DenseMap<GlobalObject *, uint64_t> GlobalObjLayout;
- for (auto &P : GlobalLayout)
- GlobalObjLayout[P.first->getGlobal()] = P.second;
+ CombinedGlobalAddr = ConstantExpr::getBitCast(CombinedGlobalAddr, Int8PtrTy);
// For each type identifier in this disjoint set...
for (Metadata *TypeId : TypeIds) {
@@ -640,13 +676,43 @@ void LowerTypeTestsModule::lowerTypeTestCalls(
BSI.print(dbgs());
});
- ByteArrayInfo *BAI = nullptr;
+ TypeIdLowering TIL;
+ TIL.OffsetedGlobal = ConstantExpr::getGetElementPtr(
+ Int8Ty, CombinedGlobalAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset)),
+ TIL.AlignLog2 = ConstantInt::get(Int8Ty, BSI.AlignLog2);
+ if (BSI.isAllOnes()) {
+ TIL.TheKind = (BSI.BitSize == 1) ? TypeTestResolution::Single
+ : TypeTestResolution::AllOnes;
+ TIL.SizeBitWidth = (BSI.BitSize <= 256) ? 8 : 32;
+ TIL.Size = ConstantInt::get((BSI.BitSize <= 256) ? Int8Ty : Int32Ty,
+ BSI.BitSize);
+ } else if (BSI.BitSize <= 64) {
+ TIL.TheKind = TypeTestResolution::Inline;
+ TIL.SizeBitWidth = (BSI.BitSize <= 32) ? 5 : 6;
+ TIL.Size = ConstantInt::get(Int8Ty, BSI.BitSize);
+ uint64_t InlineBits = 0;
+ for (auto Bit : BSI.Bits)
+ InlineBits |= uint64_t(1) << Bit;
+ if (InlineBits == 0)
+ TIL.TheKind = TypeTestResolution::Unsat;
+ else
+ TIL.InlineBits = ConstantInt::get(
+ (BSI.BitSize <= 32) ? Int32Ty : Int64Ty, InlineBits);
+ } else {
+ TIL.TheKind = TypeTestResolution::ByteArray;
+ TIL.SizeBitWidth = (BSI.BitSize <= 256) ? 8 : 32;
+ TIL.Size = ConstantInt::get((BSI.BitSize <= 256) ? Int8Ty : Int32Ty,
+ BSI.BitSize);
+ ++NumByteArraysCreated;
+ ByteArrayInfo *BAI = createByteArray(BSI);
+ TIL.TheByteArray = BAI->ByteArray;
+ TIL.BitMask = BAI->MaskGlobal;
+ }
// Lower each call to llvm.type.test for this type identifier.
for (CallInst *CI : TypeTestCallSites[TypeId]) {
++NumTypeTestCallsLowered;
- Value *Lowered =
- lowerBitSetCall(CI, BSI, BAI, CombinedGlobalIntAddr, GlobalObjLayout);
+ Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL);
CI->replaceAllUsesWith(Lowered);
CI->eraseFromParent();
}
@@ -1080,6 +1146,22 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
/// Lower all type tests in this module.
LowerTypeTestsModule::LowerTypeTestsModule(Module &M) : M(M) {
+ // Handle the command-line summary arguments. This code is for testing
+ // purposes only, so we handle errors directly.
+ if (!ClSummaryAction.empty()) {
+ OwnedSummary = make_unique<ModuleSummaryIndex>();
+ if (!ClReadSummary.empty()) {
+ ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary +
+ ": ");
+ auto ReadSummaryFile =
+ ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary)));
+
+ yaml::Input In(ReadSummaryFile->getBuffer());
+ In >> *OwnedSummary;
+ ExitOnErr(errorCodeToError(In.error()));
+ }
+ }
+
Triple TargetTriple(M.getTargetTriple());
LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
Arch = TargetTriple.getArch();
@@ -1087,6 +1169,20 @@ LowerTypeTestsModule::LowerTypeTestsModule(Module &M) : M(M) {
ObjectFormat = TargetTriple.getObjectFormat();
}
+LowerTypeTestsModule::~LowerTypeTestsModule() {
+ if (ClSummaryAction.empty() || ClWriteSummary.empty())
+ return;
+
+ ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary +
+ ": ");
+ std::error_code EC;
+ raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text);
+ ExitOnErr(errorCodeToError(EC));
+
+ yaml::Output Out(OS);
+ Out << *OwnedSummary;
+}
+
bool LowerTypeTestsModule::lower() {
Function *TypeTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index f863d192fc2f..b29ed3c87451 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1637,6 +1637,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
+ case Intrinsic::cos:
+ case Intrinsic::amdgcn_cos: {
+ Value *SrcSrc;
+ Value *Src = II->getArgOperand(0);
+ if (match(Src, m_FNeg(m_Value(SrcSrc))) ||
+ match(Src, m_Intrinsic<Intrinsic::fabs>(m_Value(SrcSrc)))) {
+ // cos(-x) -> cos(x)
+ // cos(fabs(x)) -> cos(x)
+ II->setArgOperand(0, SrcSrc);
+ return II;
+ }
+
+ break;
+ }
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
// Turn PPC lvx -> load if the pointer is known aligned.
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 6a7cb0e45c63..1d5528398776 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -514,7 +514,8 @@ struct AddressSanitizer : public FunctionPass {
void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
Value *Addr, uint32_t TypeSize, bool IsWrite,
Value *SizeArgument, bool UseCalls, uint32_t Exp);
- void instrumentUnusualSizeOrAlignment(Instruction *I, Value *Addr,
+ void instrumentUnusualSizeOrAlignment(Instruction *I,
+ Instruction *InsertBefore, Value *Addr,
uint32_t TypeSize, bool IsWrite,
Value *SizeArgument, bool UseCalls,
uint32_t Exp);
@@ -1056,20 +1057,18 @@ Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I,
return nullptr;
*IsWrite = false;
}
- // Only instrument if the mask is constant for now.
- if (isa<ConstantVector>(CI->getOperand(2 + OpOffset))) {
- auto BasePtr = CI->getOperand(0 + OpOffset);
- auto Ty = cast<PointerType>(BasePtr->getType())->getElementType();
- *TypeSize = DL.getTypeStoreSizeInBits(Ty);
- if (auto AlignmentConstant =
- dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
- *Alignment = (unsigned)AlignmentConstant->getZExtValue();
- else
- *Alignment = 1; // No alignment guarantees. We probably got Undef
- if (MaybeMask)
- *MaybeMask = CI->getOperand(2 + OpOffset);
- PtrOperand = BasePtr;
- }
+
+ auto BasePtr = CI->getOperand(0 + OpOffset);
+ auto Ty = cast<PointerType>(BasePtr->getType())->getElementType();
+ *TypeSize = DL.getTypeStoreSizeInBits(Ty);
+ if (auto AlignmentConstant =
+ dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
+ *Alignment = (unsigned)AlignmentConstant->getZExtValue();
+ else
+ *Alignment = 1; // No alignment guarantees. We probably got Undef
+ if (MaybeMask)
+ *MaybeMask = CI->getOperand(2 + OpOffset);
+ PtrOperand = BasePtr;
}
}
@@ -1130,24 +1129,25 @@ void AddressSanitizer::instrumentPointerComparisonOrSubtraction(
}
static void doInstrumentAddress(AddressSanitizer *Pass, Instruction *I,
- Value *Addr, unsigned Alignment,
- unsigned Granularity, uint32_t TypeSize,
- bool IsWrite, Value *SizeArgument,
- bool UseCalls, uint32_t Exp) {
+ Instruction *InsertBefore, Value *Addr,
+ unsigned Alignment, unsigned Granularity,
+ uint32_t TypeSize, bool IsWrite,
+ Value *SizeArgument, bool UseCalls,
+ uint32_t Exp) {
// Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check
// if the data is properly aligned.
if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 ||
TypeSize == 128) &&
(Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8))
- return Pass->instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr,
- UseCalls, Exp);
- Pass->instrumentUnusualSizeOrAlignment(I, Addr, TypeSize, IsWrite, nullptr,
- UseCalls, Exp);
+ return Pass->instrumentAddress(I, InsertBefore, Addr, TypeSize, IsWrite,
+ nullptr, UseCalls, Exp);
+ Pass->instrumentUnusualSizeOrAlignment(I, InsertBefore, Addr, TypeSize,
+ IsWrite, nullptr, UseCalls, Exp);
}
static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
const DataLayout &DL, Type *IntptrTy,
- ConstantVector *Mask, Instruction *I,
+ Value *Mask, Instruction *I,
Value *Addr, unsigned Alignment,
unsigned Granularity, uint32_t TypeSize,
bool IsWrite, Value *SizeArgument,
@@ -1157,15 +1157,30 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
unsigned Num = VTy->getVectorNumElements();
auto Zero = ConstantInt::get(IntptrTy, 0);
for (unsigned Idx = 0; Idx < Num; ++Idx) {
- // dyn_cast as we might get UndefValue
- auto Masked = dyn_cast<ConstantInt>(Mask->getOperand(Idx));
- if (Masked && Masked->isAllOnesValue()) {
+ Value *InstrumentedAddress = nullptr;
+ Instruction *InsertBefore = I;
+ if (auto *Vector = dyn_cast<ConstantVector>(Mask)) {
+ // dyn_cast as we might get UndefValue
+ if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) {
+ if (Masked->isNullValue())
+ // Mask is constant false, so no instrumentation needed.
+ continue;
+ // If we have a true or undef value, fall through to doInstrumentAddress
+ // with InsertBefore == I
+ }
+ } else {
IRBuilder<> IRB(I);
- auto InstrumentedAddress =
- IRB.CreateGEP(Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
- doInstrumentAddress(Pass, I, InstrumentedAddress, Alignment, Granularity,
- ElemTypeSize, IsWrite, SizeArgument, UseCalls, Exp);
+ Value *MaskElem = IRB.CreateExtractElement(Mask, Idx);
+ TerminatorInst *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false);
+ InsertBefore = ThenTerm;
}
+
+ IRBuilder<> IRB(InsertBefore);
+ InstrumentedAddress =
+ IRB.CreateGEP(Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
+ doInstrumentAddress(Pass, I, InsertBefore, InstrumentedAddress, Alignment,
+ Granularity, ElemTypeSize, IsWrite, SizeArgument,
+ UseCalls, Exp);
}
}
@@ -1220,12 +1235,11 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
unsigned Granularity = 1 << Mapping.Scale;
if (MaybeMask) {
- auto Mask = cast<ConstantVector>(MaybeMask);
- instrumentMaskedLoadOrStore(this, DL, IntptrTy, Mask, I, Addr, Alignment,
- Granularity, TypeSize, IsWrite, nullptr,
- UseCalls, Exp);
+ instrumentMaskedLoadOrStore(this, DL, IntptrTy, MaybeMask, I, Addr,
+ Alignment, Granularity, TypeSize, IsWrite,
+ nullptr, UseCalls, Exp);
} else {
- doInstrumentAddress(this, I, Addr, Alignment, Granularity, TypeSize,
+ doInstrumentAddress(this, I, I, Addr, Alignment, Granularity, TypeSize,
IsWrite, nullptr, UseCalls, Exp);
}
}
@@ -1342,9 +1356,9 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
// and the last bytes. We call __asan_report_*_n(addr, real_size) to be able
// to report the actual access size.
void AddressSanitizer::instrumentUnusualSizeOrAlignment(
- Instruction *I, Value *Addr, uint32_t TypeSize, bool IsWrite,
- Value *SizeArgument, bool UseCalls, uint32_t Exp) {
- IRBuilder<> IRB(I);
+ Instruction *I, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize,
+ bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp) {
+ IRBuilder<> IRB(InsertBefore);
Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8);
Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
if (UseCalls) {
@@ -1358,8 +1372,8 @@ void AddressSanitizer::instrumentUnusualSizeOrAlignment(
Value *LastByte = IRB.CreateIntToPtr(
IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)),
Addr->getType());
- instrumentAddress(I, I, Addr, 8, IsWrite, Size, false, Exp);
- instrumentAddress(I, I, LastByte, 8, IsWrite, Size, false, Exp);
+ instrumentAddress(I, InsertBefore, Addr, 8, IsWrite, Size, false, Exp);
+ instrumentAddress(I, InsertBefore, LastByte, 8, IsWrite, Size, false, Exp);
}
}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 9485bfd7c296..0137378b828b 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1572,6 +1572,13 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// Assign value numbers to the new instructions.
for (Instruction *I : NewInsts) {
+ // Instructions that have been inserted in predecessor(s) to materialize
+ // the load address do not retain their original debug locations. Doing
+ // so could lead to confusing (but correct) source attributions.
+ // FIXME: How do we retain source locations without causing poor debugging
+ // behavior?
+ I->setDebugLoc(DebugLoc());
+
// FIXME: We really _ought_ to insert these value numbers into their
// parent's availability map. However, in doing so, we risk getting into
// ordering issues. If a block hasn't been processed yet, we would be
@@ -1601,8 +1608,11 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
if (auto *RangeMD = LI->getMetadata(LLVMContext::MD_range))
NewLoad->setMetadata(LLVMContext::MD_range, RangeMD);
- // Transfer DebugLoc.
- NewLoad->setDebugLoc(LI->getDebugLoc());
+ // We do not propagate the old load's debug location, because the new
+ // load now lives in a different BB, and we want to avoid a jumpy line
+ // table.
+ // FIXME: How do we retain source locations without causing poor debugging
+ // behavior?
// Add the newly created load.
ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 1cc5c8f0da84..6ef9d0561322 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -408,6 +408,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
CurAST->deleteValue(&I);
I.eraseFromParent();
}
+ Changed = true;
continue;
}
@@ -766,6 +767,14 @@ static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
// Move the new node to the Preheader, before its terminator.
I.moveBefore(Preheader->getTerminator());
+ // Do not retain debug locations when we are moving instructions to different
+ // basic blocks, because we want to avoid jumpy line tables. Calls, however,
+ // need to retain their debug locs because they may be inlined.
+ // FIXME: How do we retain source locations without causing poor debugging
+ // behavior?
+ if (!isa<CallInst>(I))
+ I.setDebugLoc(DebugLoc());
+
if (isa<LoadInst>(I))
++NumMovedLoads;
else if (isa<CallInst>(I))
@@ -911,14 +920,23 @@ bool llvm::promoteLoopAccessesToScalars(
//
// If at least one store is guaranteed to execute, both properties are
// satisfied, and promotion is legal.
+ //
// This, however, is not a necessary condition. Even if no store/load is
- // guaranteed to execute, we can still establish these properties:
- // (p1) by proving that hoisting the load into the preheader is
- // safe (i.e. proving dereferenceability on all paths through the loop). We
+ // guaranteed to execute, we can still establish these properties.
+ // We can establish (p1) by proving that hoisting the load into the preheader
+ // is safe (i.e. proving dereferenceability on all paths through the loop). We
// can use any access within the alias set to prove dereferenceability,
// since they're all must alias.
- // (p2) by proving the memory is thread-local, so the memory model
+ //
+ // There are two ways establish (p2):
+ // a) Prove the location is thread-local. In this case the memory model
// requirement does not apply, and stores are safe to insert.
+ // b) Prove a store dominates every exit block. In this case, if an exit
+ // blocks is reached, the original dynamic path would have taken us through
+ // the store, so inserting a store into the exit block is safe. Note that this
+ // is different from the store being guaranteed to execute. For instance,
+ // if an exception is thrown on the first iteration of the loop, the original
+ // store is never executed, but the exit blocks are not executed either.
bool DereferenceableInPH = false;
bool SafeToInsertStore = false;
@@ -1000,6 +1018,17 @@ bool llvm::promoteLoopAccessesToScalars(
}
}
+ // If a store dominates all exit blocks, it is safe to sink.
+ // As explained above, if an exit block was executed, a dominating
+ // store must have been been executed at least once, so we are not
+ // introducing stores on paths that did not have them.
+ // Note that this only looks at explicit exit blocks. If we ever
+ // start sinking stores into unwind edges (see above), this will break.
+ if (!SafeToInsertStore)
+ SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
+ return DT->dominates(Store->getParent(), Exit);
+ });
+
// If the store is not guaranteed to execute, we may still get
// deref info through it.
if (!DereferenceableInPH) {
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index fd167db11789..2743574ecca6 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -997,7 +997,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
/// Check if the given conditional branch is based on the comparison between
/// a variable and zero, and if the variable is non-zero, the control yields to
/// the loop entry. If the branch matches the behavior, the variable involved
-/// in the comparion is returned. This function will be called to see if the
+/// in the comparison is returned. This function will be called to see if the
/// precondition and postcondition of the loop are in desirable form.
static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) {
if (!BI || !BI->isConditional())
diff --git a/lib/Transforms/Scalar/LoopSink.cpp b/lib/Transforms/Scalar/LoopSink.cpp
index 90309d7ebba6..f64354497771 100644
--- a/lib/Transforms/Scalar/LoopSink.cpp
+++ b/lib/Transforms/Scalar/LoopSink.cpp
@@ -283,8 +283,7 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
// sinked.
for (auto II = Preheader->rbegin(), E = Preheader->rend(); II != E;) {
Instruction *I = &*II++;
- if (!L.hasLoopInvariantOperands(I) ||
- !canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr))
+ if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr))
continue;
if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI))
Changed = true;
diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp
index 440e36767edf..678d02e05d42 100644
--- a/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -56,12 +56,9 @@ bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(
if (!isPerformingImport() && !isModuleExporting())
return false;
- // If we are exporting, we need to see whether this value is marked
- // as NoRename in the summary. If we are importing, we may not have
- // a summary in the distributed backend case (only summaries for values
- // importes as defs, not references, are included in the index passed
- // to the distributed backends).
if (isPerformingImport()) {
+ assert((!GlobalsToImport->count(SGV) || !isNonRenamableLocal(*SGV)) &&
+ "Attempting to promote non-renamable local");
// We don't know for sure yet if we are importing this value (as either
// a reference or a def), since we are simply walking all values in the
// module. But by necessity if we end up importing it and it is local,
@@ -77,13 +74,28 @@ bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(
assert(Summaries->second.size() == 1 && "Local has more than one summary");
auto Linkage = Summaries->second.front()->linkage();
if (!GlobalValue::isLocalLinkage(Linkage)) {
- assert(!Summaries->second.front()->noRename());
+ assert(!isNonRenamableLocal(*SGV) &&
+ "Attempting to promote non-renamable local");
return true;
}
return false;
}
+#ifndef NDEBUG
+bool FunctionImportGlobalProcessing::isNonRenamableLocal(
+ const GlobalValue &GV) const {
+ if (!GV.hasLocalLinkage())
+ return false;
+ // This needs to stay in sync with the logic in buildModuleSummaryIndex.
+ if (GV.hasSection())
+ return true;
+ if (Used.count(const_cast<GlobalValue *>(&GV)))
+ return true;
+ return false;
+}
+#endif
+
std::string FunctionImportGlobalProcessing::getName(const GlobalValue *SGV,
bool DoPromote) {
// For locals that must be promoted to global scope, ensure that
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8cde0c4cd607..31daba2248aa 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6785,22 +6785,19 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) {
return Cost;
}
-/// \brief Check whether the address computation for a non-consecutive memory
-/// access looks like an unlikely candidate for being merged into the indexing
-/// mode.
+/// \brief Gets Address Access SCEV after verifying that the access pattern
+/// is loop invariant except the induction variable dependence.
///
-/// We look for a GEP which has one index that is an induction variable and all
-/// other indices are loop invariant. If the stride of this access is also
-/// within a small bound we decide that this address computation can likely be
-/// merged into the addressing mode.
-/// In all other cases, we identify the address computation as complex.
-static bool isLikelyComplexAddressComputation(Value *Ptr,
- LoopVectorizationLegality *Legal,
- ScalarEvolution *SE,
- const Loop *TheLoop) {
+/// This SCEV can be sent to the Target in order to estimate the address
+/// calculation cost.
+static const SCEV *getAddressAccessSCEV(
+ Value *Ptr,
+ LoopVectorizationLegality *Legal,
+ ScalarEvolution *SE,
+ const Loop *TheLoop) {
auto *Gep = dyn_cast<GetElementPtrInst>(Ptr);
if (!Gep)
- return true;
+ return nullptr;
// We are looking for a gep with all loop invariant indices except for one
// which should be an induction variable.
@@ -6809,33 +6806,11 @@ static bool isLikelyComplexAddressComputation(Value *Ptr,
Value *Opd = Gep->getOperand(i);
if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) &&
!Legal->isInductionVariable(Opd))
- return true;
+ return nullptr;
}
- // Now we know we have a GEP ptr, %inv, %ind, %inv. Make sure that the step
- // can likely be merged into the address computation.
- unsigned MaxMergeDistance = 64;
-
- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Ptr));
- if (!AddRec)
- return true;
-
- // Check the step is constant.
- const SCEV *Step = AddRec->getStepRecurrence(*SE);
- // Calculate the pointer stride and check if it is consecutive.
- const auto *C = dyn_cast<SCEVConstant>(Step);
- if (!C)
- return true;
-
- const APInt &APStepVal = C->getAPInt();
-
- // Huge step value - give up.
- if (APStepVal.getBitWidth() > 64)
- return true;
-
- int64_t StepVal = APStepVal.getSExtValue();
-
- return StepVal > MaxMergeDistance;
+ // Now we know we have a GEP ptr, %inv, %ind, %inv. return the Ptr SCEV.
+ return SE->getSCEV(Ptr);
}
static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) {
@@ -7063,12 +7038,12 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
unsigned Cost = 0;
Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
- // True if the memory instruction's address computation is complex.
- bool IsComplexComputation =
- isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop);
+ // Figure out whether the access is strided and get the stride value
+ // if it's known in compile time
+ const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, SE, TheLoop);
// Get the cost of the scalar memory instruction and address computation.
- Cost += VF * TTI.getAddressComputationCost(PtrTy, IsComplexComputation);
+ Cost += VF * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
Cost += VF *
TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
Alignment, AS);