summaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.cpp22
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.h1
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp20
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h10
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp20
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.h9
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp111
-rw-r--r--lib/CodeGen/GlobalISel/IRTranslator.cpp67
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelector.cpp20
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerHelper.cpp68
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerInfo.cpp37
-rw-r--r--lib/CodeGen/GlobalISel/MachineIRBuilder.cpp24
-rw-r--r--lib/CodeGen/GlobalISel/RegBankSelect.cpp57
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp68
-rw-r--r--lib/CodeGen/LiveRangeCalc.h4
-rw-r--r--lib/CodeGen/MIRParser/MIParser.cpp6
-rw-r--r--lib/CodeGen/MachineOptimizationRemarkEmitter.cpp10
-rw-r--r--lib/CodeGen/MacroFusion.cpp27
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp170
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp2
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp9
-rw-r--r--lib/CodeGen/RenameIndependentSubregs.cpp6
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp118
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp21
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp34
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp2
-rw-r--r--lib/CodeGen/TargetPassConfig.cpp36
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp11
30 files changed, 747 insertions, 249 deletions
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index c48fcaa7b0d1d..ff427c9a0d756 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -631,7 +631,9 @@ void AsmPrinter::EmitFunctionHeader() {
const Function *F = MF->getFunction();
if (isVerbose())
- OutStreamer->GetCommentOS() << "-- Begin function " << F->getName() << '\n';
+ OutStreamer->GetCommentOS()
+ << "-- Begin function "
+ << GlobalValue::dropLLVMManglingEscape(F->getName()) << '\n';
// Print out constants referenced by the function
EmitConstantPool();
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index e94616fd59006..a81d56e9618bf 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -365,7 +365,7 @@ static void addLocIfNotPresent(SmallVectorImpl<const DILocation *> &Locs,
void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL,
const MachineFunction *MF) {
// Skip this instruction if it has the same location as the previous one.
- if (DL == CurFn->LastLoc)
+ if (!DL || DL == PrevInstLoc)
return;
const DIScope *Scope = DL.get()->getScope();
@@ -385,11 +385,11 @@ void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL,
if (!CurFn->HaveLineInfo)
CurFn->HaveLineInfo = true;
unsigned FileId = 0;
- if (CurFn->LastLoc.get() && CurFn->LastLoc->getFile() == DL->getFile())
+ if (PrevInstLoc.get() && PrevInstLoc->getFile() == DL->getFile())
FileId = CurFn->LastFileId;
else
FileId = CurFn->LastFileId = maybeRecordFile(DL->getFile());
- CurFn->LastLoc = DL;
+ PrevInstLoc = DL;
unsigned FuncId = CurFn->FuncId;
if (const DILocation *SiteLoc = DL->getInlinedAt()) {
@@ -2150,9 +2150,23 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
if (!Asm || !CurFn || MI->isDebugValue() ||
MI->getFlag(MachineInstr::FrameSetup))
return;
+
+ // If the first instruction of a new MBB has no location, find the first
+ // instruction with a location and use that.
DebugLoc DL = MI->getDebugLoc();
- if (DL == PrevInstLoc || !DL)
+ if (!DL && MI->getParent() != PrevInstBB) {
+ for (const auto &NextMI : *MI->getParent()) {
+ DL = NextMI.getDebugLoc();
+ if (DL)
+ break;
+ }
+ }
+ PrevInstBB = MI->getParent();
+
+ // If we still don't have a debug location, don't record a location.
+ if (!DL)
return;
+
maybeRecordLocation(DL, Asm->MF);
}
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 2cd495aec6dc4..fd8f60425c240 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -118,7 +118,6 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
SmallVector<LocalVariable, 1> Locals;
- DebugLoc LastLoc;
const MCSymbol *Begin = nullptr;
const MCSymbol *End = nullptr;
unsigned FuncId = 0;
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index dc39d1e6cb525..d4a90eeabe155 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -245,17 +245,6 @@ void DwarfCompileUnit::addRange(RangeSpan Range) {
CURanges.back().setEnd(Range.getEnd());
}
-DIE::value_iterator
-DwarfCompileUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Label, const MCSymbol *Sec) {
- if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
- return addLabel(Die, Attribute,
- DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
- : dwarf::DW_FORM_data4,
- Label);
- return addSectionDelta(Die, Attribute, Label, Sec);
-}
-
void DwarfCompileUnit::initStmtList() {
// Define start line table label for each Compile Unit.
MCSymbol *LineTableStartSym =
@@ -380,15 +369,6 @@ void DwarfCompileUnit::constructScopeDIE(
FinalChildren.push_back(std::move(ScopeDIE));
}
-DIE::value_iterator
-DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Hi, const MCSymbol *Lo) {
- return Die.addValue(DIEValueAllocator, Attribute,
- DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
- : dwarf::DW_FORM_data4,
- new (DIEValueAllocator) DIEDelta(Hi, Lo));
-}
-
void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
SmallVector<RangeSpan, 2> Range) {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 3c2fb8d99db75..e386727928673 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -127,10 +127,6 @@ public:
void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label);
- /// addSectionDelta - Add a label delta attribute data and value.
- DIE::value_iterator addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Hi, const MCSymbol *Lo);
-
DwarfCompileUnit &getCU() override { return *this; }
unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override;
@@ -151,12 +147,6 @@ public:
void attachLowHighPC(DIE &D, const MCSymbol *Begin, const MCSymbol *End);
- /// addSectionLabel - Add a Dwarf section label attribute data and value.
- ///
- DIE::value_iterator addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Label,
- const MCSymbol *Sec);
-
/// \brief Find DIE for the given subprogram and attach appropriate
/// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
/// variables in this scope then create and insert DIEs for these
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 708f5f7536ff1..4f4ebfc562977 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -1587,6 +1587,26 @@ void DwarfTypeUnit::emitHeader(bool UseOffsets) {
sizeof(Ty->getOffset()));
}
+DIE::value_iterator
+DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Hi, const MCSymbol *Lo) {
+ return Die.addValue(DIEValueAllocator, Attribute,
+ DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+ : dwarf::DW_FORM_data4,
+ new (DIEValueAllocator) DIEDelta(Hi, Lo));
+}
+
+DIE::value_iterator
+DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label, const MCSymbol *Sec) {
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ return addLabel(Die, Attribute,
+ DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+ : dwarf::DW_FORM_data4,
+ Label);
+ return addSectionDelta(Die, Attribute, Label, Sec);
+}
+
bool DwarfTypeUnit::isDwoUnit() const {
// Since there are no skeleton type units, all type units are dwo type units
// when split DWARF is being used.
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 7acad2cbd89fc..4cc01b3298d47 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -291,6 +291,15 @@ public:
void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy);
+ /// addSectionDelta - Add a label delta attribute data and value.
+ DIE::value_iterator addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Hi, const MCSymbol *Lo);
+
+ /// Add a Dwarf section label attribute data and value.
+ DIE::value_iterator addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label,
+ const MCSymbol *Sec);
+
protected:
~DwarfUnit();
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index cb31c21293f44..b50e76f2e3ba2 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -1662,6 +1662,7 @@ class MemCmpExpansion {
PHINode *PhiRes;
bool IsUsedForZeroCmp;
const DataLayout &DL;
+ IRBuilder<> Builder;
unsigned calculateNumBlocks(unsigned Size);
void createLoadCmpBlocks();
@@ -1671,13 +1672,14 @@ class MemCmpExpansion {
void emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
unsigned GEPIndex);
Value *getCompareLoadPairs(unsigned Index, unsigned Size,
- unsigned &NumBytesProcessed, IRBuilder<> &Builder);
+ unsigned &NumBytesProcessed);
void emitLoadCompareBlockMultipleLoads(unsigned Index, unsigned Size,
unsigned &NumBytesProcessed);
void emitLoadCompareByteBlock(unsigned Index, unsigned GEPIndex);
void emitMemCmpResultBlock();
Value *getMemCmpExpansionZeroCase(unsigned Size);
Value *getMemCmpEqZeroOneBlock(unsigned Size);
+ Value *getMemCmpOneBlock(unsigned Size);
unsigned getLoadSize(unsigned Size);
unsigned getNumLoads(unsigned Size);
@@ -1702,7 +1704,7 @@ MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size,
unsigned MaxLoadSize, unsigned LoadsPerBlock,
const DataLayout &TheDataLayout)
: CI(CI), MaxLoadSize(MaxLoadSize), NumLoadsPerBlock(LoadsPerBlock),
- DL(TheDataLayout) {
+ DL(TheDataLayout), Builder(CI) {
// A memcmp with zero-comparison with only one block of load and compare does
// not need to set up any extra blocks. This case could be handled in the DAG,
@@ -1710,7 +1712,7 @@ MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size,
// we choose to handle this case too to avoid fragmented lowering.
IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
NumBlocks = calculateNumBlocks(Size);
- if (!IsUsedForZeroCmp || NumBlocks != 1) {
+ if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || NumBlocks != 1) {
BasicBlock *StartBlock = CI->getParent();
EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
setupEndBlockPHINodes();
@@ -1731,7 +1733,6 @@ MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size,
StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
}
- IRBuilder<> Builder(CI->getContext());
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
}
@@ -1754,8 +1755,6 @@ void MemCmpExpansion::createResultBlock() {
// final phi node for selecting the memcmp result.
void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index,
unsigned GEPIndex) {
- IRBuilder<> Builder(CI->getContext());
-
Value *Source1 = CI->getArgOperand(0);
Value *Source2 = CI->getArgOperand(1);
@@ -1811,8 +1810,7 @@ unsigned MemCmpExpansion::getLoadSize(unsigned Size) {
/// This is used in the case where the memcmp() call is compared equal or not
/// equal to zero.
Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size,
- unsigned &NumBytesProcessed,
- IRBuilder<> &Builder) {
+ unsigned &NumBytesProcessed) {
std::vector<Value *> XorList, OrList;
Value *Diff;
@@ -1910,8 +1908,7 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size,
void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(
unsigned Index, unsigned Size, unsigned &NumBytesProcessed) {
- IRBuilder<> Builder(CI->getContext());
- Value *Cmp = getCompareLoadPairs(Index, Size, NumBytesProcessed, Builder);
+ Value *Cmp = getCompareLoadPairs(Index, Size, NumBytesProcessed);
BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1))
? EndBlock
@@ -1946,8 +1943,6 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
return;
}
- IRBuilder<> Builder(CI->getContext());
-
Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8);
Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
assert(LoadSize <= MaxLoadSize && "Unexpected load type");
@@ -1975,9 +1970,7 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
if (DL.isLittleEndian()) {
- Function *F = LoadCmpBlocks[Index]->getParent();
-
- Function *Bswap = Intrinsic::getDeclaration(F->getParent(),
+ Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
Intrinsic::bswap, LoadSizeType);
LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
@@ -1995,16 +1988,13 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[Index]);
}
- Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2);
-
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
- ConstantInt::get(Diff->getType(), 0));
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2);
BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1))
? EndBlock
: LoadCmpBlocks[Index + 1];
// Early exit branch if difference found to ResultBlock. Otherwise, continue
// to next LoadCmpBlock or EndBlock.
- BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
+ BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
Builder.Insert(CmpBr);
// Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
@@ -2020,8 +2010,6 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
// memcmp result. It compares the two loaded source values and returns -1 if
// src1 < src2 and 1 if src1 > src2.
void MemCmpExpansion::emitMemCmpResultBlock() {
- IRBuilder<> Builder(CI->getContext());
-
// Special case: if memcmp result is used in a zero equality, result does not
// need to be calculated and can simply return 1.
if (IsUsedForZeroCmp) {
@@ -2070,7 +2058,6 @@ unsigned MemCmpExpansion::calculateNumBlocks(unsigned Size) {
}
void MemCmpExpansion::setupResultBlockPHINodes() {
- IRBuilder<> Builder(CI->getContext());
Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
Builder.SetInsertPoint(ResBlock.BB);
ResBlock.PhiSrc1 =
@@ -2080,8 +2067,6 @@ void MemCmpExpansion::setupResultBlockPHINodes() {
}
void MemCmpExpansion::setupEndBlockPHINodes() {
- IRBuilder<> Builder(CI->getContext());
-
Builder.SetInsertPoint(&EndBlock->front());
PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
}
@@ -2102,11 +2087,45 @@ Value *MemCmpExpansion::getMemCmpExpansionZeroCase(unsigned Size) {
/// in the general case.
Value *MemCmpExpansion::getMemCmpEqZeroOneBlock(unsigned Size) {
unsigned NumBytesProcessed = 0;
- IRBuilder<> Builder(CI->getContext());
- Value *Cmp = getCompareLoadPairs(0, Size, NumBytesProcessed, Builder);
+ Value *Cmp = getCompareLoadPairs(0, Size, NumBytesProcessed);
return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
}
+/// A memcmp expansion that only has one block of load and compare can bypass
+/// the compare, branch, and phi IR that is required in the general case.
+Value *MemCmpExpansion::getMemCmpOneBlock(unsigned Size) {
+ assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block");
+
+ Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
+ Value *Source1 = CI->getArgOperand(0);
+ Value *Source2 = CI->getArgOperand(1);
+
+ // Cast source to LoadSizeType*.
+ if (Source1->getType() != LoadSizeType)
+ Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
+ if (Source2->getType() != LoadSizeType)
+ Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
+
+ // Load LoadSizeType from the base address.
+ Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+ Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+
+ if (DL.isLittleEndian() && Size != 1) {
+ Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
+ Intrinsic::bswap, LoadSizeType);
+ LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
+ LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
+ }
+
+ // TODO: Instead of comparing ULT, just subtract and return the difference?
+ Value *CmpNE = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);
+ Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2);
+ Type *I32 = Builder.getInt32Ty();
+ Value *Sel1 = Builder.CreateSelect(CmpULT, ConstantInt::get(I32, -1),
+ ConstantInt::get(I32, 1));
+ return Builder.CreateSelect(CmpNE, Sel1, ConstantInt::get(I32, 0));
+}
+
// This function expands the memcmp call into an inline expansion and returns
// the memcmp result.
Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) {
@@ -2114,6 +2133,10 @@ Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) {
return NumBlocks == 1 ? getMemCmpEqZeroOneBlock(Size) :
getMemCmpExpansionZeroCase(Size);
+ // TODO: Handle more than one load pair per block in getMemCmpOneBlock().
+ if (NumBlocks == 1 && NumLoadsPerBlock == 1)
+ return getMemCmpOneBlock(Size);
+
// This loop calls emitLoadCompareBlock for comparing Size bytes of the two
// memcmp sources. It starts with loading using the maximum load size set by
// the target. It processes any remaining bytes using a load size which is the
@@ -2218,7 +2241,6 @@ Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) {
static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
const TargetLowering *TLI, const DataLayout *DL) {
NumMemCmpCalls++;
- IRBuilder<> Builder(CI->getContext());
// TTI call to check if target would like to expand memcmp. Also, get the
// MaxLoadSize.
@@ -4378,14 +4400,16 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// If the real base value actually came from an inttoptr, then the matcher
// will look through it and provide only the integer value. In that case,
// use it here.
- if (!ResultPtr && AddrMode.BaseReg) {
- ResultPtr =
- Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr");
- AddrMode.BaseReg = nullptr;
- } else if (!ResultPtr && AddrMode.Scale == 1) {
- ResultPtr =
- Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr");
- AddrMode.Scale = 0;
+ if (!DL->isNonIntegralPointerType(Addr->getType())) {
+ if (!ResultPtr && AddrMode.BaseReg) {
+ ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
+ "sunkaddr");
+ AddrMode.BaseReg = nullptr;
+ } else if (!ResultPtr && AddrMode.Scale == 1) {
+ ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
+ "sunkaddr");
+ AddrMode.Scale = 0;
+ }
}
if (!ResultPtr &&
@@ -4466,6 +4490,19 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
}
} else {
+ // We'd require a ptrtoint/inttoptr down the line, which we can't do for
+ // non-integral pointers, so in that case bail out now.
+ Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
+ Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
+ PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
+ PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
+ if (DL->isNonIntegralPointerType(Addr->getType()) ||
+ (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
+ (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
+ (AddrMode.BaseGV &&
+ DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
+ return false;
+
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst << "\n");
Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
@@ -6367,7 +6404,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
}
// Update PHI nodes in both successors. The original BB needs to be
- // replaced in one succesor's PHI nodes, because the branch comes now from
+ // replaced in one successor's PHI nodes, because the branch comes now from
// the newly generated BB (NewBB). In the other successor we need to add one
// incoming edge to the PHI nodes, because both branch instructions target
// now the same successor. Depending on the original branch condition
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 239bad2f53557..521037f9d206b 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator --*- C++ -*-==//
+//===- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator ---*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -11,34 +11,69 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
-
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <string>
+#include <utility>
+#include <vector>
#define DEBUG_TYPE "irtranslator"
using namespace llvm;
char IRTranslator::ID = 0;
+
INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
@@ -62,7 +97,7 @@ static void reportTranslationError(MachineFunction &MF,
ORE.emit(R);
}
-IRTranslator::IRTranslator() : MachineFunctionPass(ID), MRI(nullptr) {
+IRTranslator::IRTranslator() : MachineFunctionPass(ID) {
initializeIRTranslatorPass(*PassRegistry::getPassRegistry());
}
@@ -71,7 +106,6 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-
unsigned IRTranslator::getOrCreateVReg(const Value &Val) {
unsigned &ValReg = ValToVReg[&Val];
@@ -686,6 +720,26 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
.addUse(getOrCreateVReg(*CI.getArgOperand(0)))
.addUse(getOrCreateVReg(*CI.getArgOperand(1)));
return true;
+ case Intrinsic::exp:
+ MIRBuilder.buildInstr(TargetOpcode::G_FEXP)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ case Intrinsic::exp2:
+ MIRBuilder.buildInstr(TargetOpcode::G_FEXP2)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ case Intrinsic::log:
+ MIRBuilder.buildInstr(TargetOpcode::G_FLOG)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ case Intrinsic::log2:
+ MIRBuilder.buildInstr(TargetOpcode::G_FLOG2)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
case Intrinsic::fma:
MIRBuilder.buildInstr(TargetOpcode::G_FMA)
.addDef(getOrCreateVReg(CI))
@@ -834,7 +888,6 @@ bool IRTranslator::translateInvoke(const User &U,
if (!isa<LandingPadInst>(EHPadBB->front()))
return false;
-
// Emit the actual call, bracketed by EH_LABELs so that the MF knows about
// the region covered by the try.
MCSymbol *BeginSymbol = Context.createTempSymbol();
@@ -1195,7 +1248,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
MRI = &MF->getRegInfo();
DL = &F.getParent()->getDataLayout();
TPC = &getAnalysis<TargetPassConfig>();
- ORE = make_unique<OptimizationRemarkEmitter>(&F);
+ ORE = llvm::make_unique<OptimizationRemarkEmitter>(&F);
assert(PendingPHIs.empty() && "stale PHIs");
diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 5466efd7e90f4..860fc9a4f8b61 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -1,4 +1,4 @@
-//===- llvm/CodeGen/GlobalISel/InstructionSelector.cpp -----------*- C++ -*-==//
+//===- llvm/CodeGen/GlobalISel/InstructionSelector.cpp --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,19 +11,22 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
#define DEBUG_TYPE "instructionselector"
using namespace llvm;
-InstructionSelector::InstructionSelector() {}
+InstructionSelector::InstructionSelector() = default;
bool InstructionSelector::constrainOperandRegToRegClass(
MachineInstr &I, unsigned OpIdx, const TargetRegisterClass &RC,
@@ -33,8 +36,8 @@ bool InstructionSelector::constrainOperandRegToRegClass(
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
- return llvm::constrainRegToClass(MRI, TII, RBI, I,
- I.getOperand(OpIdx).getReg(), RC);
+ return
+ constrainRegToClass(MRI, TII, RBI, I, I.getOperand(OpIdx).getReg(), RC);
}
bool InstructionSelector::constrainSelectedInstRegOperands(
@@ -84,7 +87,6 @@ bool InstructionSelector::constrainSelectedInstRegOperands(
bool InstructionSelector::isOperandImmEqual(
const MachineOperand &MO, int64_t Value,
const MachineRegisterInfo &MRI) const {
-
if (MO.isReg() && MO.getReg())
if (auto VRegVal = getConstantVRegVal(MO.getReg(), MRI))
return *VRegVal == Value;
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 1d0d3dffa4c59..84b0a0ac41579 100644
--- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -158,7 +158,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
unsigned TypeIdx,
LLT NarrowTy) {
// FIXME: Don't know how to handle secondary types yet.
- if (TypeIdx != 0)
+ if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT)
return UnableToLegalize;
MIRBuilder.setInstr(MI);
@@ -166,6 +166,20 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
+ case TargetOpcode::G_IMPLICIT_DEF: {
+ int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() /
+ NarrowTy.getSizeInBits();
+
+ SmallVector<unsigned, 2> DstRegs;
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildUndef(Dst);
+ DstRegs.push_back(Dst);
+ }
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_ADD: {
// Expand in terms of carry-setting/consuming G_ADDE instructions.
int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() /
@@ -193,6 +207,58 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_EXTRACT: {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ int64_t NarrowSize = NarrowTy.getSizeInBits();
+ int NumParts =
+ MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() / NarrowSize;
+
+ SmallVector<unsigned, 2> SrcRegs, DstRegs;
+ SmallVector<uint64_t, 2> Indexes;
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+
+ unsigned OpReg = MI.getOperand(0).getReg();
+ int64_t OpStart = MI.getOperand(2).getImm();
+ int64_t OpSize = MRI.getType(OpReg).getSizeInBits();
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned SrcStart = i * NarrowSize;
+
+ if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
+ // No part of the extract uses this subregister, ignore it.
+ continue;
+ } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
+ // The entire subregister is extracted, forward the value.
+ DstRegs.push_back(SrcRegs[i]);
+ continue;
+ }
+
+ // OpSegStart is where this destination segment would start in OpReg if it
+ // extended infinitely in both directions.
+ int64_t ExtractOffset, SegSize;
+ if (OpStart < SrcStart) {
+ ExtractOffset = 0;
+ SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
+ } else {
+ ExtractOffset = OpStart - SrcStart;
+ SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
+ }
+
+ unsigned SegReg = SrcRegs[i];
+ if (ExtractOffset != 0 || SegSize != NarrowSize) {
+ // A genuine extract is needed.
+ SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
+ MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
+ }
+
+ DstRegs.push_back(SegReg);
+ }
+
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_INSERT: {
if (TypeIdx != 0)
return UnableToLegalize;
diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 595802f2228b9..76917aa9660d4 100644
--- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -1,4 +1,4 @@
-//===---- lib/CodeGen/GlobalISel/LegalizerInfo.cpp - Legalizer -------==//
+//===- lib/CodeGen/GlobalISel/LegalizerInfo.cpp - Legalizer ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,16 +18,25 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
-
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/Type.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOpcodes.h"
+#include <algorithm>
+#include <cassert>
+#include <tuple>
+#include <utility>
+
using namespace llvm;
-LegalizerInfo::LegalizerInfo() : TablesInitialized(false) {
+LegalizerInfo::LegalizerInfo() {
+ DefaultActions[TargetOpcode::G_IMPLICIT_DEF] = NarrowScalar;
+
// FIXME: these two can be legalized to the fundamental load/store Jakob
// proposed. Once loads & stores are supported.
DefaultActions[TargetOpcode::G_ANYEXT] = Legal;
@@ -42,6 +51,7 @@ LegalizerInfo::LegalizerInfo() : TablesInitialized(false) {
DefaultActions[TargetOpcode::G_BRCOND] = WidenScalar;
DefaultActions[TargetOpcode::G_INSERT] = NarrowScalar;
+ DefaultActions[TargetOpcode::G_EXTRACT] = NarrowScalar;
DefaultActions[TargetOpcode::G_FNEG] = Lower;
}
@@ -75,8 +85,7 @@ LegalizerInfo::getAction(const InstrAspect &Aspect) const {
// FIXME: the long-term plan calls for expansion in terms of load/store (if
// they're not legal).
- if (Aspect.Opcode == TargetOpcode::G_EXTRACT ||
- Aspect.Opcode == TargetOpcode::G_MERGE_VALUES ||
+ if (Aspect.Opcode == TargetOpcode::G_MERGE_VALUES ||
Aspect.Opcode == TargetOpcode::G_UNMERGE_VALUES)
return std::make_pair(Legal, Aspect.Type);
@@ -172,21 +181,21 @@ Optional<LLT> LegalizerInfo::findLegalType(const InstrAspect &Aspect,
case Custom:
return Aspect.Type;
case NarrowScalar: {
- return findLegalType(Aspect,
- [](LLT Ty) -> LLT { return Ty.halfScalarSize(); });
+ return findLegalizableSize(
+ Aspect, [&](LLT Ty) -> LLT { return Ty.halfScalarSize(); });
}
case WidenScalar: {
- return findLegalType(Aspect, [](LLT Ty) -> LLT {
+ return findLegalizableSize(Aspect, [&](LLT Ty) -> LLT {
return Ty.getSizeInBits() < 8 ? LLT::scalar(8) : Ty.doubleScalarSize();
});
}
case FewerElements: {
- return findLegalType(Aspect,
- [](LLT Ty) -> LLT { return Ty.halfElements(); });
+ return findLegalizableSize(
+ Aspect, [&](LLT Ty) -> LLT { return Ty.halfElements(); });
}
case MoreElements: {
- return findLegalType(Aspect,
- [](LLT Ty) -> LLT { return Ty.doubleElements(); });
+ return findLegalizableSize(
+ Aspect, [&](LLT Ty) -> LLT { return Ty.doubleElements(); });
}
}
}
diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 3c70013ea296b..47c6214c05528 100644
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -264,10 +264,13 @@ MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
}
MachineInstrBuilder MachineIRBuilder::buildBrIndirect(unsigned Tgt) {
+ assert(MRI->getType(Tgt).isPointer() && "invalid branch destination");
return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt);
}
MachineInstrBuilder MachineIRBuilder::buildCopy(unsigned Res, unsigned Op) {
+ assert(MRI->getType(Res) == LLT() || MRI->getType(Op) == LLT() ||
+ MRI->getType(Res) == MRI->getType(Op));
return buildInstr(TargetOpcode::COPY).addDef(Res).addUse(Op);
}
@@ -364,27 +367,36 @@ MachineInstrBuilder MachineIRBuilder::buildZExt(unsigned Res, unsigned Op) {
MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res,
unsigned Op) {
+ assert(MRI->getType(Res).isScalar() || MRI->getType(Res).isVector());
+ assert(MRI->getType(Res).isScalar() == MRI->getType(Op).isScalar());
+
unsigned Opcode = TargetOpcode::COPY;
if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits())
Opcode = TargetOpcode::G_SEXT;
else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits())
Opcode = TargetOpcode::G_TRUNC;
+ else
+ assert(MRI->getType(Res) == MRI->getType(Op));
return buildInstr(Opcode).addDef(Res).addUse(Op);
}
MachineInstrBuilder MachineIRBuilder::buildZExtOrTrunc(unsigned Res,
unsigned Op) {
+ assert(MRI->getType(Res).isScalar() || MRI->getType(Res).isVector());
+ assert(MRI->getType(Res).isScalar() == MRI->getType(Op).isScalar());
+
unsigned Opcode = TargetOpcode::COPY;
if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits())
Opcode = TargetOpcode::G_ZEXT;
else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits())
Opcode = TargetOpcode::G_TRUNC;
+ else
+ assert(MRI->getType(Res) == MRI->getType(Op));
return buildInstr(Opcode).addDef(Res).addUse(Op);
}
-
MachineInstrBuilder MachineIRBuilder::buildCast(unsigned Dst, unsigned Src) {
LLT SrcTy = MRI->getType(Src);
LLT DstTy = MRI->getType(Dst);
@@ -466,7 +478,7 @@ void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
}
MachineInstrBuilder MachineIRBuilder::buildUndef(unsigned Res) {
- return buildInstr(TargetOpcode::IMPLICIT_DEF).addDef(Res);
+ return buildInstr(TargetOpcode::G_IMPLICIT_DEF).addDef(Res);
}
MachineInstrBuilder MachineIRBuilder::buildMerge(unsigned Res,
@@ -482,6 +494,9 @@ MachineInstrBuilder MachineIRBuilder::buildMerge(unsigned Res,
"input operands do not cover output register");
#endif
+ if (Ops.size() == 1)
+ return buildCast(Res, Ops[0]);
+
MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_MERGE_VALUES);
MIB.addDef(Res);
for (unsigned i = 0; i < Ops.size(); ++i)
@@ -511,8 +526,11 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<unsigned> Res,
MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src,
unsigned Op, unsigned Index) {
+ assert(Index + MRI->getType(Op).getSizeInBits() <=
+ MRI->getType(Res).getSizeInBits() &&
+ "insertion past the end of a register");
+
if (MRI->getType(Res).getSizeInBits() == MRI->getType(Op).getSizeInBits()) {
- assert(Index == 0 && "insertion past the end of a register");
return buildCast(Res, Op);
}
diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 2eb3cdee694d4..677941dbbf6da 100644
--- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -1,4 +1,4 @@
-//===- llvm/CodeGen/GlobalISel/RegBankSelect.cpp - RegBankSelect -*- C++ -*-==//
+//==- llvm/CodeGen/GlobalISel/RegBankSelect.cpp - RegBankSelect --*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -12,18 +12,39 @@
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOpcodes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <utility>
#define DEBUG_TYPE "regbankselect"
@@ -37,6 +58,7 @@ static cl::opt<RegBankSelect::Mode> RegBankSelectMode(
"Use the Greedy mode (best local mapping)")));
char RegBankSelect::ID = 0;
+
INITIALIZE_PASS_BEGIN(RegBankSelect, DEBUG_TYPE,
"Assign register bank of generic virtual registers",
false, false);
@@ -48,8 +70,7 @@ INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,
false)
RegBankSelect::RegBankSelect(Mode RunningMode)
- : MachineFunctionPass(ID), RBI(nullptr), MRI(nullptr), TRI(nullptr),
- MBFI(nullptr), MBPI(nullptr), OptMode(RunningMode) {
+ : MachineFunctionPass(ID), OptMode(RunningMode) {
initializeRegBankSelectPass(*PassRegistry::getPassRegistry());
if (RegBankSelectMode.getNumOccurrences() != 0) {
OptMode = RegBankSelectMode;
@@ -72,7 +93,7 @@ void RegBankSelect::init(MachineFunction &MF) {
MBPI = nullptr;
}
MIRBuilder.setMF(MF);
- MORE = make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
+ MORE = llvm::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
}
void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -133,9 +154,11 @@ bool RegBankSelect::repairReg(
TargetRegisterInfo::isPhysicalRegister(Dst)) &&
"We are about to create several defs for Dst");
- // Build the instruction used to repair, then clone it at the right places.
- MachineInstr *MI = MIRBuilder.buildCopy(Dst, Src);
- MI->removeFromParent();
+ // Build the instruction used to repair, then clone it at the right
+ // places. Avoiding buildCopy bypasses the check that Src and Dst have the
+ // same types because the type is a placeholder when this function is called.
+ MachineInstr *MI =
+ MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY).addDef(Dst).addUse(Src);
DEBUG(dbgs() << "Copy: " << PrintReg(Src) << " to: " << PrintReg(Dst)
<< '\n');
// TODO:
@@ -202,11 +225,11 @@ uint64_t RegBankSelect::getRepairCost(
RBI->copyCost(*DesiredRegBrank, *CurRegBank,
RegisterBankInfo::getSizeInBits(MO.getReg(), *MRI, *TRI));
// TODO: use a dedicated constant for ImpossibleCost.
- if (Cost != UINT_MAX)
+ if (Cost != std::numeric_limits<unsigned>::max())
return Cost;
// Return the legalization cost of that repairing.
}
- return UINT_MAX;
+ return std::numeric_limits<unsigned>::max();
}
const RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping(
@@ -352,7 +375,7 @@ void RegBankSelect::tryAvoidingSplit(
// the repairing cost because of the PHIs already proceeded
// as already stated.
// Though the code will be correct.
- assert(0 && "Repairing cost may not be accurate");
+ assert(false && "Repairing cost may not be accurate");
} else {
// We need to do non-local repairing. Basically, patch all
// the uses (i.e., phis) that we already proceeded.
@@ -450,7 +473,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
uint64_t RepairCost = getRepairCost(MO, ValMapping);
// This is an impossible to repair cost.
- if (RepairCost == UINT_MAX)
+ if (RepairCost == std::numeric_limits<unsigned>::max())
continue;
// Bias used for splitting: 5%.
@@ -535,9 +558,11 @@ bool RegBankSelect::applyMapping(
llvm_unreachable("Other kind should not happen");
}
}
+
// Second, rewrite the instruction.
DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n');
RBI->applyMapping(OpdMapper);
+
return true;
}
@@ -638,11 +663,8 @@ RegBankSelect::RepairingPlacement::RepairingPlacement(
MachineInstr &MI, unsigned OpIdx, const TargetRegisterInfo &TRI, Pass &P,
RepairingPlacement::RepairingKind Kind)
// Default is, we are going to insert code to repair OpIdx.
- : Kind(Kind),
- OpIdx(OpIdx),
- CanMaterialize(Kind != RepairingKind::Impossible),
- HasSplit(false),
- P(P) {
+ : Kind(Kind), OpIdx(OpIdx),
+ CanMaterialize(Kind != RepairingKind::Impossible), P(P) {
const MachineOperand &MO = MI.getOperand(OpIdx);
assert(MO.isReg() && "Trying to repair a non-reg operand");
@@ -847,7 +869,7 @@ bool RegBankSelect::EdgeInsertPoint::canMaterialize() const {
}
RegBankSelect::MappingCost::MappingCost(const BlockFrequency &LocalFreq)
- : LocalCost(0), NonLocalCost(0), LocalFreq(LocalFreq.getFrequency()) {}
+ : LocalFreq(LocalFreq.getFrequency()) {}
bool RegBankSelect::MappingCost::addLocalCost(uint64_t Cost) {
// Check if this overflows.
@@ -920,7 +942,6 @@ bool RegBankSelect::MappingCost::operator<(const MappingCost &Cost) const {
OtherLocalAdjust = Cost.LocalCost - LocalCost;
else
ThisLocalAdjust = LocalCost - Cost.LocalCost;
-
} else {
ThisLocalAdjust = LocalCost;
OtherLocalAdjust = Cost.LocalCost;
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index 398066bf8903e..8c43c9f3f8846 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -20,11 +20,14 @@ using namespace llvm;
#define DEBUG_TYPE "regalloc"
+// Reserve an address that indicates a value that is known to be "undef".
+static VNInfo UndefVNI(0xbad, SlotIndex());
+
void LiveRangeCalc::resetLiveOutMap() {
unsigned NumBlocks = MF->getNumBlockIDs();
Seen.clear();
Seen.resize(NumBlocks);
- EntryInfoMap.clear();
+ EntryInfos.clear();
Map.resize(NumBlocks);
}
@@ -283,8 +286,11 @@ bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs,
// Determine if the exit from the block is reached by some def.
unsigned N = WorkList[i];
MachineBasicBlock &B = *MF->getBlockNumbered(N);
- if (Seen[N] && Map[&B].first != nullptr)
- return MarkDefined(B);
+ if (Seen[N]) {
+ const LiveOutPair &LOB = Map[&B];
+ if (LOB.first != nullptr && LOB.first != &UndefVNI)
+ return MarkDefined(B);
+ }
SlotIndex Begin, End;
std::tie(Begin, End) = Indexes->getMBBRange(&B);
// Treat End as not belonging to B.
@@ -365,10 +371,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
#endif
FoundUndef |= MBB->pred_empty();
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
- MachineBasicBlock *Pred = *PI;
-
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
// Is this a known live-out block?
if (Seen.test(Pred->getNumber())) {
if (VNInfo *VNI = Map[Pred].first) {
@@ -387,7 +390,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
auto EP = LR.extendInBlock(Undefs, Start, End);
VNInfo *VNI = EP.first;
FoundUndef |= EP.second;
- setLiveOutValue(Pred, VNI);
+ setLiveOutValue(Pred, EP.second ? &UndefVNI : VNI);
if (VNI) {
if (TheVNI && TheVNI != VNI)
UniqueVNI = false;
@@ -406,7 +409,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
}
LiveIn.clear();
- FoundUndef |= (TheVNI == nullptr);
+ FoundUndef |= (TheVNI == nullptr || TheVNI == &UndefVNI);
if (Undefs.size() > 0 && FoundUndef)
UniqueVNI = false;
@@ -417,7 +420,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
// If a unique reaching def was found, blit in the live ranges immediately.
if (UniqueVNI) {
- assert(TheVNI != nullptr);
+ assert(TheVNI != nullptr && TheVNI != &UndefVNI);
LiveRangeUpdater Updater(&LR);
for (unsigned BN : WorkList) {
SlotIndex Start, End;
@@ -433,22 +436,26 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
}
// Prepare the defined/undefined bit vectors.
- auto EF = EntryInfoMap.find(&LR);
- if (EF == EntryInfoMap.end()) {
+ EntryInfoMap::iterator Entry;
+ bool DidInsert;
+ std::tie(Entry, DidInsert) = EntryInfos.insert(
+ std::make_pair(&LR, std::make_pair(BitVector(), BitVector())));
+ if (DidInsert) {
+ // Initialize newly inserted entries.
unsigned N = MF->getNumBlockIDs();
- EF = EntryInfoMap.insert({&LR, {BitVector(), BitVector()}}).first;
- EF->second.first.resize(N);
- EF->second.second.resize(N);
+ Entry->second.first.resize(N);
+ Entry->second.second.resize(N);
}
- BitVector &DefOnEntry = EF->second.first;
- BitVector &UndefOnEntry = EF->second.second;
+ BitVector &DefOnEntry = Entry->second.first;
+ BitVector &UndefOnEntry = Entry->second.second;
// Multiple values were found, so transfer the work list to the LiveIn array
// where UpdateSSA will use it as a work list.
LiveIn.reserve(WorkList.size());
for (unsigned BN : WorkList) {
MachineBasicBlock *MBB = MF->getBlockNumbered(BN);
- if (Undefs.size() > 0 && !isDefOnEntry(LR, Undefs, *MBB, DefOnEntry, UndefOnEntry))
+ if (Undefs.size() > 0 &&
+ !isDefOnEntry(LR, Undefs, *MBB, DefOnEntry, UndefOnEntry))
continue;
addLiveInBlock(LR, DomTree->getNode(MBB));
if (MBB == &UseMBB)
@@ -466,9 +473,9 @@ void LiveRangeCalc::updateSSA() {
assert(DomTree && "Missing dominator tree");
// Interate until convergence.
- unsigned Changes;
+ bool Changed;
do {
- Changes = 0;
+ Changed = false;
// Propagate live-out values down the dominator tree, inserting phi-defs
// when necessary.
for (LiveInBlock &I : LiveIn) {
@@ -491,15 +498,20 @@ void LiveRangeCalc::updateSSA() {
IDomValue = Map[IDom->getBlock()];
// Cache the DomTree node that defined the value.
- if (IDomValue.first && !IDomValue.second)
+ if (IDomValue.first && IDomValue.first != &UndefVNI &&
+ !IDomValue.second) {
Map[IDom->getBlock()].second = IDomValue.second =
DomTree->getNode(Indexes->getMBBFromIndex(IDomValue.first->def));
+ }
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
- LiveOutPair &Value = Map[*PI];
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ LiveOutPair &Value = Map[Pred];
if (!Value.first || Value.first == IDomValue.first)
continue;
+ if (Value.first == &UndefVNI) {
+ needPHI = true;
+ break;
+ }
// Cache the DomTree node that defined the value.
if (!Value.second)
@@ -523,7 +535,7 @@ void LiveRangeCalc::updateSSA() {
// Create a phi-def if required.
if (needPHI) {
- ++Changes;
+ Changed = true;
assert(Alloc && "Need VNInfo allocator to create PHI-defs");
SlotIndex Start, End;
std::tie(Start, End) = Indexes->getMBBRange(MBB);
@@ -542,7 +554,7 @@ void LiveRangeCalc::updateSSA() {
LR.addSegment(LiveInterval::Segment(Start, End, VNI));
LOP = LiveOutPair(VNI, Node);
}
- } else if (IDomValue.first) {
+ } else if (IDomValue.first && IDomValue.first != &UndefVNI) {
// No phi-def here. Remember incoming value.
I.Value = IDomValue.first;
@@ -554,9 +566,9 @@ void LiveRangeCalc::updateSSA() {
// MBB is live-out and doesn't define its own value.
if (LOP.first == IDomValue.first)
continue;
- ++Changes;
+ Changed = true;
LOP = IDomValue;
}
}
- } while (Changes);
+ } while (Changed);
}
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
index 1a7598f8044a5..d41b782d9bdf2 100644
--- a/lib/CodeGen/LiveRangeCalc.h
+++ b/lib/CodeGen/LiveRangeCalc.h
@@ -24,6 +24,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/CodeGen/LiveInterval.h"
@@ -65,7 +66,8 @@ class LiveRangeCalc {
/// registers do not overlap), but the defined/undefined information must
/// be kept separate for each individual range.
/// By convention, EntryInfoMap[&LR] = { Defined, Undefined }.
- std::map<LiveRange*,std::pair<BitVector,BitVector>> EntryInfoMap;
+ typedef DenseMap<LiveRange*,std::pair<BitVector,BitVector>> EntryInfoMap;
+ EntryInfoMap EntryInfos;
/// Map each basic block where a live range is live out to the live-out value
/// and its defining block.
diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp
index f58d1f8b83aeb..c58d192284dd0 100644
--- a/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/lib/CodeGen/MIRParser/MIParser.cpp
@@ -579,12 +579,12 @@ bool MIParser::parseBasicBlock(MachineBasicBlock &MBB,
//
// is equivalent to
// liveins: %edi, %esi
- bool ExplicitSuccesors = false;
+ bool ExplicitSuccessors = false;
while (true) {
if (Token.is(MIToken::kw_successors)) {
if (parseBasicBlockSuccessors(MBB))
return true;
- ExplicitSuccesors = true;
+ ExplicitSuccessors = true;
} else if (Token.is(MIToken::kw_liveins)) {
if (parseBasicBlockLiveins(MBB))
return true;
@@ -636,7 +636,7 @@ bool MIParser::parseBasicBlock(MachineBasicBlock &MBB,
}
// Construct successor list by searching for basic block machine operands.
- if (!ExplicitSuccesors) {
+ if (!ExplicitSuccessors) {
SmallVector<MachineBasicBlock*,4> Successors;
bool IsFallthrough;
guessSuccessors(MBB, Successors, IsFallthrough);
diff --git a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
index 6b6b5f2814a90..73c3428a6e535 100644
--- a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
+++ b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -52,6 +52,14 @@ void MachineOptimizationRemarkEmitter::emit(
computeHotness(OptDiag);
LLVMContext &Ctx = MF.getFunction()->getContext();
+
+ // If a diagnostic has a hotness value, then only emit it if its hotness
+ // meets the threshold.
+ if (OptDiag.getHotness() &&
+ *OptDiag.getHotness() < Ctx.getDiagnosticsHotnessThreshold()) {
+ return;
+ }
+
yaml::Output *Out = Ctx.getDiagnosticsOutputFile();
if (Out) {
auto *P = &const_cast<DiagnosticInfoOptimizationBase &>(OptDiagCommon);
@@ -73,7 +81,7 @@ bool MachineOptimizationRemarkEmitterPass::runOnMachineFunction(
MachineFunction &MF) {
MachineBlockFrequencyInfo *MBFI;
- if (MF.getFunction()->getContext().getDiagnosticHotnessRequested())
+ if (MF.getFunction()->getContext().getDiagnosticsHotnessRequested())
MBFI = &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI();
else
MBFI = nullptr;
diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp
index 45ea0e4c39ab4..5e279b065bbda 100644
--- a/lib/CodeGen/MacroFusion.cpp
+++ b/lib/CodeGen/MacroFusion.cpp
@@ -1,4 +1,4 @@
-//===- MacroFusion.cpp - Macro Fusion ----------------------===//
+//===- MacroFusion.cpp - Macro Fusion -------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,8 +13,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MacroFusion.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#define DEBUG_TYPE "misched"
@@ -26,8 +33,6 @@ using namespace llvm;
static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
cl::desc("Enable scheduling for macro fusion."), cl::init(true));
-namespace {
-
static void fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
SUnit &SecondSU) {
// Create a single weak edge between the adjacent instrs. The only effect is
@@ -66,6 +71,7 @@ static void fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
++NumFused;
}
+namespace {
/// \brief Post-process the DAG to create cluster edges between instrs that may
/// be fused by the processor into a single operation.
@@ -81,6 +87,8 @@ public:
void apply(ScheduleDAGInstrs *DAGInstrs) override;
};
+} // end anonymous namespace
+
void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
@@ -128,23 +136,18 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) {
return false;
}
-} // end anonymous namespace
-
-
-namespace llvm {
-
std::unique_ptr<ScheduleDAGMutation>
-createMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent) {
+llvm::createMacroFusionDAGMutation(
+ ShouldSchedulePredTy shouldScheduleAdjacent) {
if(EnableMacroFusion)
return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, true);
return nullptr;
}
std::unique_ptr<ScheduleDAGMutation>
-createBranchMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent) {
+llvm::createBranchMacroFusionDAGMutation(
+ ShouldSchedulePredTy shouldScheduleAdjacent) {
if(EnableMacroFusion)
return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, false);
return nullptr;
}
-
-} // end namespace llvm
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index da8fac6d3834a..b13f6b68c420f 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -76,6 +76,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -119,6 +120,14 @@ static cl::opt<unsigned> RewritePHILimit(
"rewrite-phi-limit", cl::Hidden, cl::init(10),
cl::desc("Limit the length of PHI chains to lookup"));
+// Limit the length of recurrence chain when evaluating the benefit of
+// commuting operands.
+static cl::opt<unsigned> MaxRecurrenceChain(
+ "recurrence-chain-limit", cl::Hidden, cl::init(3),
+ cl::desc("Maximum length of recurrence chain when evaluating the benefit "
+ "of commuting operands"));
+
+
STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumCmps, "Number of compares eliminated");
STATISTIC(NumImmFold, "Number of move immediate folded");
@@ -131,12 +140,14 @@ STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
namespace {
class ValueTrackerResult;
+ class RecurrenceInstr;
class PeepholeOptimizer : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineRegisterInfo *MRI;
MachineDominatorTree *DT; // Machine dominator tree
+ MachineLoopInfo *MLI;
public:
static char ID; // Pass identification
@@ -150,6 +161,8 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
if (Aggressive) {
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
@@ -160,6 +173,9 @@ namespace {
typedef SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult>
RewriteMapTy;
+ /// \brief Sequence of instructions that formulate recurrence cycle.
+ typedef SmallVector<RecurrenceInstr, 4> RecurrenceCycle;
+
private:
bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
@@ -170,6 +186,7 @@ namespace {
bool optimizeCoalescableCopy(MachineInstr *MI);
bool optimizeUncoalescableCopy(MachineInstr *MI,
SmallPtrSetImpl<MachineInstr *> &LocalMIs);
+ bool optimizeRecurrence(MachineInstr &PHI);
bool findNextSource(unsigned Reg, unsigned SubReg,
RewriteMapTy &RewriteMap);
bool isMoveImmediate(MachineInstr *MI,
@@ -178,6 +195,13 @@ namespace {
bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ /// \brief Finds recurrence cycles, but only ones that formulated around
+ /// a def operand and a use operand that are tied. If there is a use
+ /// operand commutable with the tied use operand, find recurrence cycle
+ /// along that operand as well.
+ bool findTargetRecurrence(unsigned Reg,
+ const SmallSet<unsigned, 2> &TargetReg,
+ RecurrenceCycle &RC);
/// \brief If copy instruction \p MI is a virtual register copy, track it in
/// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was
@@ -222,6 +246,28 @@ namespace {
}
};
+ /// \brief Helper class to hold instructions that are inside recurrence
+ /// cycles. The recurrence cycle is formulated around 1) a def operand and its
+ /// tied use operand, or 2) a def operand and a use operand that is commutable
+ /// with another use operand which is tied to the def operand. In the latter
+ /// case, index of the tied use operand and the commutable use operand are
+ /// maintained with CommutePair.
+ class RecurrenceInstr {
+ public:
+ typedef std::pair<unsigned, unsigned> IndexPair;
+
+ RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
+ RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2)
+ : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
+
+ MachineInstr *getMI() const { return MI; }
+ Optional<IndexPair> getCommutePair() const { return CommutePair; }
+
+ private:
+ MachineInstr *MI;
+ Optional<IndexPair> CommutePair;
+ };
+
/// \brief Helper class to hold a reply for ValueTracker queries. Contains the
/// returned sources for a given search and the instructions where the sources
/// were tracked from.
@@ -412,6 +458,7 @@ char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE,
"Peephole Optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE,
"Peephole Optimizations", false, false)
@@ -1487,6 +1534,113 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
return false;
}
+/// \bried Returns true if \p MO is a virtual register operand.
+static bool isVirtualRegisterOperand(MachineOperand &MO) {
+ if (!MO.isReg())
+ return false;
+ return TargetRegisterInfo::isVirtualRegister(MO.getReg());
+}
+
+bool PeepholeOptimizer::findTargetRecurrence(
+ unsigned Reg, const SmallSet<unsigned, 2> &TargetRegs,
+ RecurrenceCycle &RC) {
+ // Recurrence found if Reg is in TargetRegs.
+ if (TargetRegs.count(Reg))
+ return true;
+
+ // TODO: Curerntly, we only allow the last instruction of the recurrence
+ // cycle (the instruction that feeds the PHI instruction) to have more than
+ // one uses to guarantee that commuting operands does not tie registers
+ // with overlapping live range. Once we have actual live range info of
+ // each register, this constraint can be relaxed.
+ if (!MRI->hasOneNonDBGUse(Reg))
+ return false;
+
+ // Give up if the reccurrence chain length is longer than the limit.
+ if (RC.size() >= MaxRecurrenceChain)
+ return false;
+
+ MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg));
+ unsigned Idx = MI.findRegisterUseOperandIdx(Reg);
+
+ // Only interested in recurrences whose instructions have only one def, which
+ // is a virtual register.
+ if (MI.getDesc().getNumDefs() != 1)
+ return false;
+
+ MachineOperand &DefOp = MI.getOperand(0);
+ if (!isVirtualRegisterOperand(DefOp))
+ return false;
+
+ // Check if def operand of MI is tied to any use operand. We are only
+ // interested in the case that all the instructions in the recurrence chain
+ // have there def operand tied with one of the use operand.
+ unsigned TiedUseIdx;
+ if (!MI.isRegTiedToUseOperand(0, &TiedUseIdx))
+ return false;
+
+ if (Idx == TiedUseIdx) {
+ RC.push_back(RecurrenceInstr(&MI));
+ return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
+ } else {
+ // If Idx is not TiedUseIdx, check if Idx is commutable with TiedUseIdx.
+ unsigned CommIdx = TargetInstrInfo::CommuteAnyOperandIndex;
+ if (TII->findCommutedOpIndices(MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
+ RC.push_back(RecurrenceInstr(&MI, Idx, CommIdx));
+ return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
+ }
+ }
+
+ return false;
+}
+
+/// \brief Phi instructions will eventually be lowered to copy instructions. If
+/// phi is in a loop header, a recurrence may formulated around the source and
+/// destination of the phi. For such case commuting operands of the instructions
+/// in the recurrence may enable coalescing of the copy instruction generated
+/// from the phi. For example, if there is a recurrence of
+///
+/// LoopHeader:
+/// %vreg1 = phi(%vreg0, %vreg100)
+/// LoopLatch:
+/// %vreg0<def, tied1> = ADD %vreg2<def, tied0>, %vreg1
+///
+/// , the fact that vreg0 and vreg2 are in the same tied operands set makes
+/// the coalescing of copy instruction generated from the phi in
+/// LoopHeader(i.e. %vreg1 = COPY %vreg0) impossible, because %vreg1 and
+/// %vreg2 have overlapping live range. This introduces additional move
+/// instruction to the final assembly. However, if we commute %vreg2 and
+/// %vreg1 of ADD instruction, the redundant move instruction can be
+/// avoided.
+bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
+ SmallSet<unsigned, 2> TargetRegs;
+ for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) {
+ MachineOperand &MO = PHI.getOperand(Idx);
+ assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction");
+ TargetRegs.insert(MO.getReg());
+ }
+
+ bool Changed = false;
+ RecurrenceCycle RC;
+ if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) {
+ // Commutes operands of instructions in RC if necessary so that the copy to
+ // be generated from PHI can be coalesced.
+ DEBUG(dbgs() << "Optimize recurrence chain from " << PHI);
+ for (auto &RI : RC) {
+ DEBUG(dbgs() << "\tInst: " << *(RI.getMI()));
+ auto CP = RI.getCommutePair();
+ if (CP) {
+ Changed = true;
+ TII->commuteInstruction(*(RI.getMI()), false, (*CP).first,
+ (*CP).second);
+ DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI()));
+ }
+ }
+ }
+
+ return Changed;
+}
+
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
return false;
@@ -1501,6 +1655,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
+ MLI = &getAnalysis<MachineLoopInfo>();
bool Changed = false;
@@ -1529,6 +1684,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
SmallSet<unsigned, 4> CopySrcRegs;
DenseMap<unsigned, MachineInstr *> CopySrcMIs;
+ bool IsLoopHeader = MLI->isLoopHeader(&MBB);
+
for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
MII != MIE; ) {
MachineInstr *MI = &*MII;
@@ -1540,9 +1697,16 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (MI->isDebugValue())
continue;
- if (MI->isPosition() || MI->isPHI())
+ if (MI->isPosition())
continue;
+ if (IsLoopHeader && MI->isPHI()) {
+ if (optimizeRecurrence(*MI)) {
+ Changed = true;
+ continue;
+ }
+ }
+
if (!MI->isCopy()) {
for (const auto &Op : MI->operands()) {
// Visit all operands: definitions can be implicit or explicit.
@@ -1667,7 +1831,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
MRI->markUsesInDebugValueAsUndef(FoldedReg);
FoldAsLoadDefCandidates.erase(FoldedReg);
++NumLoadFold;
-
+
// MI is replaced with FoldMI so we can continue trying to fold
Changed = true;
MI = FoldMI;
@@ -1675,7 +1839,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
}
}
}
-
+
// If we run into an instruction we can't fold across, discard
// the load candidates. Note: We might be able to fold *into* this
// instruction, so this needs to be after the folding logic.
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 50d241bff23d1..9562652556acb 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -2622,7 +2622,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
}
// If we couldn't allocate a register from spilling, there is probably some
- // invalid inline assembly. The base class wil report it.
+ // invalid inline assembly. The base class will report it.
if (Stage >= RS_Done || !VirtReg.isSpillable())
return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
Depth);
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 7b3a5d5c5ff7f..ff9bca092dbe5 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -979,6 +979,11 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
for (LiveInterval::SubRange &SR : IntB.subranges())
SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
+
+ // If the newly created Instruction has an address of an instruction that was
+ // deleted before (object recycled by the allocator) it needs to be removed from
+ // the deleted list.
+ ErasedInstrs.erase(NewCopyMI);
} else {
DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from BB#"
<< MBB.getNumber() << '\t' << CopyMI);
@@ -989,6 +994,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
// While updating the live-ranges, we only look at slot indices and
// never go back to the instruction.
LIS->RemoveMachineInstrFromMaps(CopyMI);
+ // Mark instructions as deleted.
+ ErasedInstrs.insert(&CopyMI);
CopyMI.eraseFromParent();
// Update the liveness.
@@ -3095,7 +3102,7 @@ copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
continue;
// Skip instruction pointers that have already been erased, for example by
// dead code elimination.
- if (ErasedInstrs.erase(CurrList[i])) {
+ if (ErasedInstrs.count(CurrList[i])) {
CurrList[i] = nullptr;
continue;
}
diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp
index d2eff950d861a..bd5ecbd28f293 100644
--- a/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -243,10 +243,14 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,
unsigned VReg = Intervals[ID]->reg;
MO.setReg(VReg);
- if (MO.isTied()) {
+
+ if (MO.isTied() && Reg != VReg) {
/// Undef use operands are not tracked in the equivalence class but need
/// to be update if they are tied.
MO.getParent()->substituteRegister(Reg, VReg, 0, TRI);
+
+ // substituteRegister breaks the iterator, so restart.
+ I = MRI->reg_nodbg_begin(Reg);
}
}
// TODO: We could attempt to recompute new register classes while visiting
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 7dd66d799be4a..0f70b0e9ca077 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -1089,7 +1089,7 @@ static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs,
// Things that are available after the instruction are killed by it.
bool IsKill = LiveRegs.available(MRI, Reg);
MO.setIsKill(IsKill);
- if (IsKill && addToLiveRegs)
+ if (addToLiveRegs)
LiveRegs.addReg(Reg);
}
}
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d02dcb6f4439b..d901af7276860 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4915,7 +4915,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return SDValue();
// Loads must share the same base address
- BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr());
+ BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
int64_t ByteOffsetFromBase = 0;
if (!Base)
Base = Ptr;
@@ -8210,18 +8210,20 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
- if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) {
- uint64_t Amt = CAmt->getZExtValue();
- unsigned Size = VT.getScalarSizeInBits();
-
- if (Amt < Size) {
- SDLoc SL(N);
- EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Amt = N0.getOperand(1);
+ KnownBits Known;
+ DAG.computeKnownBits(Amt, Known);
+ unsigned Size = VT.getScalarSizeInBits();
+ if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
+ SDLoc SL(N);
+ EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
- return DAG.getNode(ISD::SHL, SL, VT, Trunc,
- DAG.getConstant(Amt, SL, AmtVT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
+ if (AmtVT != Amt.getValueType()) {
+ Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
+ AddToWorklist(Amt.getNode());
}
+ return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
}
}
@@ -9751,6 +9753,52 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
}
}
+ // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
+ // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
+ if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
+ (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
+ TLI.isOperationLegal(ISD::FABS, VT)) {
+ SDValue Select = N0, X = N1;
+ if (Select.getOpcode() != ISD::SELECT)
+ std::swap(Select, X);
+
+ SDValue Cond = Select.getOperand(0);
+ auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
+ auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
+
+ if (TrueOpnd && FalseOpnd &&
+ Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
+ isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
+ cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ switch (CC) {
+ default: break;
+ case ISD::SETOLT:
+ case ISD::SETULT:
+ case ISD::SETOLE:
+ case ISD::SETULE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ std::swap(TrueOpnd, FalseOpnd);
+ // Fall through
+ case ISD::SETOGT:
+ case ISD::SETUGT:
+ case ISD::SETOGE:
+ case ISD::SETUGE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
+ TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, DL, VT,
+ DAG.getNode(ISD::FABS, DL, VT, X));
+ if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
+ return DAG.getNode(ISD::FABS, DL, VT, X);
+
+ break;
+ }
+ }
+ }
+
// FMUL -> FMA combines:
if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
AddToWorklist(Fused.getNode());
@@ -12394,7 +12442,7 @@ void DAGCombiner::getStoreMergeCandidates(
StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
- BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
EVT MemVT = St->getMemoryVT();
// We must have a base and an offset.
@@ -12414,8 +12462,8 @@ void DAGCombiner::getStoreMergeCandidates(
BaseIndexOffset LBasePtr;
// Match on loadbaseptr if relevant.
if (IsLoadSrc)
- LBasePtr =
- BaseIndexOffset::match(cast<LoadSDNode>(St->getValue())->getBasePtr());
+ LBasePtr = BaseIndexOffset::match(
+ cast<LoadSDNode>(St->getValue())->getBasePtr(), DAG);
auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
int64_t &Offset) -> bool {
@@ -12429,7 +12477,7 @@ void DAGCombiner::getStoreMergeCandidates(
if (IsLoadSrc) {
// The Load's Base Ptr must also match
if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Other->getValue())) {
- auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr());
+ auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);
if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
return false;
} else
@@ -12443,7 +12491,7 @@ void DAGCombiner::getStoreMergeCandidates(
if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR))
return false;
- Ptr = BaseIndexOffset::match(Other->getBasePtr());
+ Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
};
// We looking for a root node which is an ancestor to all mergable
@@ -12786,7 +12834,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (Ld->getMemoryVT() != MemVT)
break;
- BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr());
+ BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
// If this is not the first ptr that we check.
int64_t LdOffset = 0;
if (LdBasePtr.getBase().getNode()) {
@@ -12829,6 +12877,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// This variable refers to the size and not index in the array.
unsigned LastLegalVectorType = 1;
unsigned LastLegalIntegerType = 1;
+ bool isDereferenceable = true;
bool DoIntegerTruncate = false;
StartAddress = LoadNodes[0].OffsetFromBase;
SDValue FirstChain = FirstLoad->getChain();
@@ -12841,6 +12890,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (CurrAddress - StartAddress != (ElementSizeBytes * i))
break;
LastConsecutiveLoad = i;
+
+ if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
+ isDereferenceable = false;
+
// Find a legal type for the vector store.
EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1);
bool IsFastSt, IsFastLd;
@@ -12926,11 +12979,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
AddToWorklist(NewStoreChain.getNode());
+ MachineMemOperand::Flags MMOFlags = isDereferenceable ?
+ MachineMemOperand::MODereferenceable:
+ MachineMemOperand::MONone;
+
SDValue NewLoad, NewStore;
if (UseVectorTy || !DoIntegerTruncate) {
NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
FirstLoad->getBasePtr(),
- FirstLoad->getPointerInfo(), FirstLoadAlign);
+ FirstLoad->getPointerInfo(), FirstLoadAlign,
+ MMOFlags);
NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), FirstStoreAlign);
@@ -12940,7 +12998,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
NewLoad =
DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(),
FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
- JointMemOpVT, FirstLoadAlign);
+ JointMemOpVT, FirstLoadAlign, MMOFlags);
NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), JointMemOpVT,
@@ -15013,6 +15071,11 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
unsigned NumElts = VT.getVectorNumElements();
unsigned EltSizeInBits = VT.getScalarSizeInBits();
unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
+ unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
+
+ if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
+ return SDValue();
+ unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
// (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
// (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
@@ -15034,11 +15097,10 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
if (EltSizeInBits != ExtSrcSizeInBits)
return SDValue();
- // Attempt to match a 'truncate_vector_inreg' shuffle, we just search for
- // power-of-2 truncations as they are the most likely.
- for (unsigned Scale = 2; Scale < NumElts; Scale *= 2)
- if (isTruncate(Scale))
- return DAG.getBitcast(VT, N00);
+ // We can remove *extend_vector_inreg only if the truncation happens at
+ // the same scale as the extension.
+ if (isTruncate(ExtScale))
+ return DAG.getBitcast(VT, N00);
return SDValue();
}
@@ -16540,8 +16602,8 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3;
// Check for BaseIndexOffset matching.
- BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr());
- BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr());
+ BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);
+ BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG);
int64_t PtrDiff;
if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
@@ -16751,7 +16813,7 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
- BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
// We must have a base and an offset.
if (!BasePtr.getBase().getNode())
@@ -16777,7 +16839,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
break;
// Find the base pointer and offset for this memory node.
- BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
// Check that the base pointer is the same as the original one.
if (!BasePtr.equalBaseIndex(Ptr, DAG))
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 75fec7bd1d485..ac3247948169e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1827,11 +1827,10 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
ISD::UADDO : ISD::USUBO,
TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
- TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT);
-
if (hasOVF) {
EVT OvfVT = getSetCCResultType(NVT);
SDVTList VTList = DAG.getVTList(NVT, OvfVT);
+ TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT);
int RevOpc;
if (N->getOpcode() == ISD::ADD) {
RevOpc = ISD::SUB;
@@ -1864,13 +1863,6 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
ISD::SETULT);
-
- if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) {
- SDValue Carry = DAG.getZExtOrTrunc(Cmp1, dl, NVT);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry);
- return;
- }
-
SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1,
DAG.getConstant(1, dl, NVT),
DAG.getConstant(0, dl, NVT));
@@ -1885,14 +1877,9 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
SDValue Cmp =
DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()),
LoOps[0], LoOps[1], ISD::SETULT);
-
- SDValue Borrow;
- if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent)
- Borrow = DAG.getZExtOrTrunc(Cmp, dl, NVT);
- else
- Borrow = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT),
- DAG.getConstant(0, dl, NVT));
-
+ SDValue Borrow = DAG.getSelect(dl, NVT, Cmp,
+ DAG.getConstant(1, dl, NVT),
+ DAG.getConstant(0, dl, NVT));
Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
}
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index d2e0dbbf88ecd..4e899ae6668e7 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -11,6 +11,7 @@
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -18,28 +19,41 @@ namespace llvm {
bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
const SelectionDAG &DAG, int64_t &Off) {
- // Obvious equivalent
+ // Initial Offset difference.
Off = Other.Offset - Offset;
- if (Other.Base == Base && Other.Index == Index &&
- Other.IsIndexSignExt == IsIndexSignExt)
- return true;
- // Match GlobalAddresses
- if (Index == Other.Index)
- if (GlobalAddressSDNode *A = dyn_cast<GlobalAddressSDNode>(Base))
- if (GlobalAddressSDNode *B = dyn_cast<GlobalAddressSDNode>(Other.Base))
+ if ((Other.Index == Index) && (Other.IsIndexSignExt == IsIndexSignExt)) {
+ // Trivial match.
+ if (Other.Base == Base)
+ return true;
+
+ // Match GlobalAddresses
+ if (auto *A = dyn_cast<GlobalAddressSDNode>(Base))
+ if (auto *B = dyn_cast<GlobalAddressSDNode>(Other.Base))
if (A->getGlobal() == B->getGlobal()) {
Off += B->getOffset() - A->getOffset();
return true;
}
- // TODO: we should be able to add FrameIndex analysis improvements here.
+ const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ // Match non-equal FrameIndexes - a FrameIndex stemming from an
+ // alloca will not have it's ObjectOffset set until post-DAG and
+ // as such we must assume the two framesIndices are incomparable.
+ if (auto *A = dyn_cast<FrameIndexSDNode>(Base))
+ if (auto *B = dyn_cast<FrameIndexSDNode>(Other.Base))
+ if (!MFI.getObjectAllocation(A->getIndex()) &&
+ !MFI.getObjectAllocation(B->getIndex())) {
+ Off += MFI.getObjectOffset(B->getIndex()) -
+ MFI.getObjectOffset(A->getIndex());
+ return true;
+ }
+ }
return false;
}
/// Parses tree in Ptr for base, index, offset addresses.
-BaseIndexOffset BaseIndexOffset::match(SDValue Ptr) {
+BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) {
// (((B + I*M) + c)) + c ...
SDValue Base = Ptr;
SDValue Index = SDValue();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index f9f431db55be3..acf68fbbdedfc 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3375,7 +3375,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
SDValue IdxN = getValue(Idx);
if (!IdxN.getValueType().isVector() && VectorWidth) {
- MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth);
+ EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth);
IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
}
diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp
index b1918b19e1dfe..817e58ce59e10 100644
--- a/lib/CodeGen/TargetPassConfig.cpp
+++ b/lib/CodeGen/TargetPassConfig.cpp
@@ -1,4 +1,4 @@
-//===-- TargetPassConfig.cpp - Target independent code generation passes --===//
+//===- TargetPassConfig.cpp - Target independent code generation passes ---===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,29 +13,37 @@
//===---------------------------------------------------------------------===//
#include "llvm/CodeGen/TargetPassConfig.h"
-
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CFLAndersAliasAnalysis.h"
#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
-#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePassRegistry.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/CodeGen/RegisterUsageInfo.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Threading.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
+#include <cassert>
+#include <string>
using namespace llvm;
@@ -225,6 +233,7 @@ char TargetPassConfig::EarlyTailDuplicateID = 0;
char TargetPassConfig::PostRAMachineLICMID = 0;
namespace {
+
struct InsertedPass {
AnalysisID TargetPassID;
IdentifyingPassPtr InsertedPassID;
@@ -245,9 +254,11 @@ struct InsertedPass {
return NP;
}
};
-}
+
+} // end anonymous namespace
namespace llvm {
+
class PassConfigImpl {
public:
// List of passes explicitly substituted by this target. Normally this is
@@ -263,7 +274,8 @@ public:
/// is inserted after each instance of the first one.
SmallVector<InsertedPass, 4> InsertedPasses;
};
-} // namespace llvm
+
+} // end namespace llvm
// Out of line virtual method.
TargetPassConfig::~TargetPassConfig() {
@@ -273,11 +285,7 @@ TargetPassConfig::~TargetPassConfig() {
// Out of line constructor provides default values for pass options and
// registers all common codegen passes.
TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)
- : ImmutablePass(ID), PM(&pm), Started(true), Stopped(false),
- AddingMachinePasses(false), TM(&TM), Impl(nullptr), Initialized(false),
- DisableVerify(false), EnableTailMerge(true),
- RequireCodeGenSCCOrder(false) {
-
+ : ImmutablePass(ID), PM(&pm), TM(&TM) {
Impl = new PassConfigImpl();
// Register all target independent codegen passes to activate their PassIDs,
@@ -325,7 +333,7 @@ TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) {
}
TargetPassConfig::TargetPassConfig()
- : ImmutablePass(ID), PM(nullptr) {
+ : ImmutablePass(ID) {
report_fatal_error("Trying to construct TargetPassConfig without a target "
"machine. Scheduling a CodeGen pass without a target "
"triple set?");
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 552a89f76ca21..83c00e24d14fc 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -68,6 +68,13 @@ EnableRescheduling("twoaddr-reschedule",
cl::desc("Coalesce copies by rescheduling (default=true)"),
cl::init(true), cl::Hidden);
+// Limit the number of dataflow edges to traverse when evaluating the benefit
+// of commuting operands.
+static cl::opt<unsigned> MaxDataFlowEdge(
+ "dataflow-edge-limit", cl::Hidden, cl::init(3),
+ cl::desc("Maximum number of dataflow edges to traverse when evaluating "
+ "the benefit of commuting operands"));
+
namespace {
class TwoAddressInstructionPass : public MachineFunctionPass {
MachineFunction *MF;
@@ -637,10 +644,10 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
// To more generally minimize register copies, ideally the logic of two addr
// instruction pass should be integrated with register allocation pass where
// interference graph is available.
- if (isRevCopyChain(regC, regA, 3))
+ if (isRevCopyChain(regC, regA, MaxDataFlowEdge))
return true;
- if (isRevCopyChain(regB, regA, 3))
+ if (isRevCopyChain(regB, regA, MaxDataFlowEdge))
return false;
// Since there are no intervening uses for both registers, then commute