summaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp20
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.cpp2
-rw-r--r--lib/CodeGen/CMakeLists.txt1
-rw-r--r--lib/CodeGen/CodeGen.cpp1
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp612
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp2
-rw-r--r--lib/CodeGen/GlobalISel/Localizer.cpp10
-rw-r--r--lib/CodeGen/ImplicitNullChecks.cpp16
-rw-r--r--lib/CodeGen/LiveRangeShrink.cpp231
-rw-r--r--lib/CodeGen/MIRParser/MIRParser.cpp2
-rw-r--r--lib/CodeGen/MIRPrinter.cpp2
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp7
-rw-r--r--lib/CodeGen/MachineInstr.cpp86
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp65
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp132
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp70
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h1
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp278
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp1
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp8
-rw-r--r--lib/CodeGen/TargetPassConfig.cpp17
22 files changed, 1289 insertions, 277 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 43b245c66400d..5abf50e5bd10c 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -165,7 +165,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I;
++I) {
unsigned Reg = *I;
- if (!IsReturnBlock && !(Pristine.test(Reg) || BB->isLiveIn(Reg)))
+ if (!IsReturnBlock && !Pristine.test(Reg))
continue;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
unsigned AliasReg = *AI;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index d72cf59229874..e61e22abe82a7 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -949,6 +949,19 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) {
MCConstantExpr::create(FrameOffset, OutContext));
}
+static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF,
+ MachineModuleInfo *MMI) {
+ if (!MF.getLandingPads().empty() || MF.hasEHFunclets() || MMI->hasDebugInfo())
+ return true;
+
+ // We might emit an EH table that uses function begin and end labels even if
+ // we don't have any landingpads.
+ if (!MF.getFunction()->hasPersonalityFn())
+ return false;
+ return !isNoOpWithoutInvoke(
+ classifyEHPersonality(MF.getFunction()->getPersonalityFn()));
+}
+
/// EmitFunctionBody - This method emits the body and trailer for a
/// function.
void AsmPrinter::EmitFunctionBody() {
@@ -1076,8 +1089,8 @@ void AsmPrinter::EmitFunctionBody() {
// Emit target-specific gunk after the function body.
EmitFunctionBodyEnd();
- if (!MF->getLandingPads().empty() || MMI->hasDebugInfo() ||
- MF->hasEHFunclets() || MAI->hasDotTypeDotSizeDirective()) {
+ if (needFuncLabelsForEHOrDebugInfo(*MF, MMI) ||
+ MAI->hasDotTypeDotSizeDirective()) {
// Create a symbol for the end of function.
CurrentFnEnd = createTempSymbol("func_end");
OutStreamer->EmitLabel(CurrentFnEnd);
@@ -1402,8 +1415,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
CurrentFnBegin = nullptr;
CurExceptionSym = nullptr;
bool NeedsLocalForSize = MAI->needsLocalForSize();
- if (!MF.getLandingPads().empty() || MMI->hasDebugInfo() ||
- MF.hasEHFunclets() || NeedsLocalForSize) {
+ if (needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize) {
CurrentFnBegin = createTempSymbol("func_begin");
if (NeedsLocalForSize)
CurrentFnSymForSize = CurrentFnBegin;
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 0a4a7a06cb2e7..e14d5be1177a6 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -309,7 +309,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
// If some instruction between the previous try-range and the end of the
// function may throw, create a call-site entry with no landing pad for the
// region following the try-range.
- if (SawPotentiallyThrowing && !IsSJLJ && LastLabel != nullptr) {
+ if (SawPotentiallyThrowing && !IsSJLJ) {
CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 };
CallSites.push_back(Site);
}
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 2b5863aa58009..55a27e2fb79e5 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -49,6 +49,7 @@ add_llvm_library(LLVMCodeGen
LivePhysRegs.cpp
LiveRangeCalc.cpp
LiveRangeEdit.cpp
+ LiveRangeShrink.cpp
LiveRegMatrix.cpp
LiveRegUnits.cpp
LiveStackAnalysis.cpp
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 2a2715beaadca..4d30c6574b121 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -43,6 +43,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeLiveDebugValuesPass(Registry);
initializeLiveDebugVariablesPass(Registry);
initializeLiveIntervalsPass(Registry);
+ initializeLiveRangeShrinkPass(Registry);
initializeLiveStacksPass(Registry);
initializeLiveVariablesPass(Registry);
initializeLocalStackSlotPassPass(Registry);
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 4e85708efafc1..568b278dd47cb 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -24,12 +24,13 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -60,6 +61,7 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
+
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -84,6 +86,12 @@ STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
+STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
+STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size");
+STATISTIC(NumMemCmpGreaterThanMax,
+ "Number of memcmp calls with size greater than max size");
+STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");
+
static cl::opt<bool> DisableBranchOpts(
"disable-cgp-branch-opts", cl::Hidden, cl::init(false),
cl::desc("Disable branch optimizations in CodeGenPrepare"));
@@ -144,6 +152,11 @@ EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
cl::desc("Enable merging of redundant sexts when one is dominating"
" the other."), cl::init(true));
+static cl::opt<unsigned> MemCmpNumLoadsPerBlock(
+ "memcmp-num-loads-per-block", cl::Hidden, cl::init(1),
+ cl::desc("The number of loads per basic block for inline expansion of "
+ "memcmp that is only being compared against zero."));
+
namespace {
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
@@ -1629,6 +1642,593 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
return true;
}
+// This class provides helper functions to expand a memcmp library call into an
+// inline expansion.
+class MemCmpExpansion {
+ struct ResultBlock {
+ BasicBlock *BB;
+ PHINode *PhiSrc1;
+ PHINode *PhiSrc2;
+ ResultBlock();
+ };
+
+ CallInst *CI;
+ ResultBlock ResBlock;
+ unsigned MaxLoadSize;
+ unsigned NumBlocks;
+ unsigned NumBlocksNonOneByte;
+ unsigned NumLoadsPerBlock;
+ std::vector<BasicBlock *> LoadCmpBlocks;
+ BasicBlock *EndBlock;
+ PHINode *PhiRes;
+ bool IsUsedForZeroCmp;
+ int calculateNumBlocks(unsigned Size);
+ void createLoadCmpBlocks();
+ void createResultBlock();
+ void setupResultBlockPHINodes();
+ void setupEndBlockPHINodes();
+ void emitLoadCompareBlock(unsigned Index, int LoadSize, int GEPIndex,
+ bool IsLittleEndian);
+ void emitLoadCompareBlockMultipleLoads(unsigned Index, unsigned Size,
+ unsigned &NumBytesProcessed);
+ void emitLoadCompareByteBlock(unsigned Index, int GEPIndex);
+ void emitMemCmpResultBlock(bool IsLittleEndian);
+ Value *getMemCmpExpansionZeroCase(unsigned Size, bool IsLittleEndian);
+ unsigned getLoadSize(unsigned Size);
+ unsigned getNumLoads(unsigned Size);
+
+public:
+ MemCmpExpansion(CallInst *CI, unsigned MaxLoadSize,
+ unsigned NumLoadsPerBlock);
+ Value *getMemCmpExpansion(bool IsLittleEndian);
+};
+
+MemCmpExpansion::ResultBlock::ResultBlock()
+ : BB(nullptr), PhiSrc1(nullptr), PhiSrc2(nullptr) {}
+
+// Initialize the basic block structure required for expansion of memcmp call
+// with given maximum load size and memcmp size parameter.
+// This structure includes:
+// 1. A list of load compare blocks - LoadCmpBlocks.
+// 2. An EndBlock, split from original instruction point, which is the block to
+// return from.
+// 3. ResultBlock, block to branch to for early exit when a
+// LoadCmpBlock finds a difference.
+MemCmpExpansion::MemCmpExpansion(CallInst *CI, unsigned MaxLoadSize,
+ unsigned NumLoadsPerBlock)
+ : CI(CI), MaxLoadSize(MaxLoadSize), NumLoadsPerBlock(NumLoadsPerBlock) {
+
+ IRBuilder<> Builder(CI->getContext());
+
+ BasicBlock *StartBlock = CI->getParent();
+ EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
+ setupEndBlockPHINodes();
+ IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
+
+ ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ uint64_t Size = SizeCast->getZExtValue();
+
+ // Calculate how many load compare blocks are required for an expansion of
+ // given Size.
+ NumBlocks = calculateNumBlocks(Size);
+ createResultBlock();
+
+ // If return value of memcmp is not used in a zero equality, we need to
+ // calculate which source was larger. The calculation requires the
+ // two loaded source values of each load compare block.
+ // These will be saved in the phi nodes created by setupResultBlockPHINodes.
+ if (!IsUsedForZeroCmp)
+ setupResultBlockPHINodes();
+
+ // Create the number of required load compare basic blocks.
+ createLoadCmpBlocks();
+
+ // Update the terminator added by splitBasicBlock to branch to the first
+ // LoadCmpBlock.
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+ StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
+}
+
+void MemCmpExpansion::createLoadCmpBlocks() {
+ for (unsigned i = 0; i < NumBlocks; i++) {
+ BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb",
+ EndBlock->getParent(), EndBlock);
+ LoadCmpBlocks.push_back(BB);
+ }
+}
+
+void MemCmpExpansion::createResultBlock() {
+ ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
+ EndBlock->getParent(), EndBlock);
+}
+
+// This function creates the IR instructions for loading and comparing 1 byte.
+// It loads 1 byte from each source of the memcmp paramters with the given
+// GEPIndex. It then subtracts the two loaded values and adds this result to the
+// final phi node for selecting the memcmp result.
+void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index, int GEPIndex) {
+ IRBuilder<> Builder(CI->getContext());
+
+ Value *Source1 = CI->getArgOperand(0);
+ Value *Source2 = CI->getArgOperand(1);
+
+ Builder.SetInsertPoint(LoadCmpBlocks[Index]);
+ Type *LoadSizeType = Type::getInt8Ty(CI->getContext());
+ // Cast source to LoadSizeType*
+ if (Source1->getType() != LoadSizeType)
+ Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
+ if (Source2->getType() != LoadSizeType)
+ Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
+
+ // Get the base address using the GEPIndex
+ if (GEPIndex != 0) {
+ Source1 = Builder.CreateGEP(LoadSizeType, Source1,
+ ConstantInt::get(LoadSizeType, GEPIndex));
+ Source2 = Builder.CreateGEP(LoadSizeType, Source2,
+ ConstantInt::get(LoadSizeType, GEPIndex));
+ }
+
+ Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+ Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+
+ LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext()));
+ LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext()));
+ Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2);
+
+ PhiRes->addIncoming(Diff, LoadCmpBlocks[Index]);
+
+ if (Index < (LoadCmpBlocks.size() - 1)) {
+ // Early exit branch if difference found to EndBlock, otherwise continue to
+ // next LoadCmpBlock
+
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
+ ConstantInt::get(Diff->getType(), 0));
+ BranchInst *CmpBr =
+ BranchInst::Create(EndBlock, LoadCmpBlocks[Index + 1], Cmp);
+ Builder.Insert(CmpBr);
+ } else {
+ // The last block has an unconditional branch to EndBlock
+ BranchInst *CmpBr = BranchInst::Create(EndBlock);
+ Builder.Insert(CmpBr);
+ }
+}
+
+unsigned MemCmpExpansion::getNumLoads(unsigned Size) {
+ return (Size / MaxLoadSize) + countPopulation(Size % MaxLoadSize);
+}
+
+unsigned MemCmpExpansion::getLoadSize(unsigned Size) {
+ return MinAlign(PowerOf2Floor(Size), MaxLoadSize);
+}
+
+void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(
+ unsigned Index, unsigned Size, unsigned &NumBytesProcessed) {
+
+ IRBuilder<> Builder(CI->getContext());
+
+ std::vector<Value *> XorList, OrList;
+ Value *Diff;
+
+ unsigned RemainingBytes = Size - NumBytesProcessed;
+ unsigned NumLoadsRemaining = getNumLoads(RemainingBytes);
+ unsigned NumLoads = std::min(NumLoadsRemaining, NumLoadsPerBlock);
+
+ Builder.SetInsertPoint(LoadCmpBlocks[Index]);
+
+ for (unsigned i = 0; i < NumLoads; ++i) {
+ unsigned LoadSize = getLoadSize(RemainingBytes);
+ unsigned GEPIndex = NumBytesProcessed / LoadSize;
+ NumBytesProcessed += LoadSize;
+ RemainingBytes -= LoadSize;
+
+ Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8);
+ Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
+
+ Value *Source1 = CI->getArgOperand(0);
+ Value *Source2 = CI->getArgOperand(1);
+
+ // Cast source to LoadSizeType*
+ if (Source1->getType() != LoadSizeType)
+ Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
+ if (Source2->getType() != LoadSizeType)
+ Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
+
+ // Get the base address using the GEPIndex
+ if (GEPIndex != 0) {
+ Source1 = Builder.CreateGEP(LoadSizeType, Source1,
+ ConstantInt::get(LoadSizeType, GEPIndex));
+ Source2 = Builder.CreateGEP(LoadSizeType, Source2,
+ ConstantInt::get(LoadSizeType, GEPIndex));
+ }
+
+ // Load LoadSizeType from the base address
+ Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+ Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+ if (LoadSizeType != MaxLoadType) {
+ LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType);
+ LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType);
+ }
+ Diff = Builder.CreateXor(LoadSrc1, LoadSrc2);
+ Diff = Builder.CreateZExtOrTrunc(Diff, MaxLoadType);
+ XorList.push_back(Diff);
+ }
+
+ auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
+ std::vector<Value *> OutList;
+ for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {
+ Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);
+ OutList.push_back(Or);
+ }
+ if (InList.size() % 2 != 0)
+ OutList.push_back(InList.back());
+ return OutList;
+ };
+
+ // Pair wise OR the XOR results
+ OrList = pairWiseOr(XorList);
+
+ // Pair wise OR the OR results until one result left
+ while (OrList.size() != 1) {
+ OrList = pairWiseOr(OrList);
+ }
+
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, OrList[0],
+ ConstantInt::get(Diff->getType(), 0));
+ BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1))
+ ? EndBlock
+ : LoadCmpBlocks[Index + 1];
+ // Early exit branch if difference found to ResultBlock, otherwise continue to
+ // next LoadCmpBlock or EndBlock.
+ BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
+ Builder.Insert(CmpBr);
+
+ // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
+ // since early exit to ResultBlock was not taken (no difference was found in
+ // any of the bytes)
+ if (Index == LoadCmpBlocks.size() - 1) {
+ Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
+ PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]);
+ }
+}
+
+// This function creates the IR intructions for loading and comparing using the
+// given LoadSize. It loads the number of bytes specified by LoadSize from each
+// source of the memcmp parameters. It then does a subtract to see if there was
+// a difference in the loaded values. If a difference is found, it branches
+// with an early exit to the ResultBlock for calculating which source was
+// larger. Otherwise, it falls through to the either the next LoadCmpBlock or
+// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with
+// a special case through emitLoadCompareByteBlock. The special handling can
+// simply subtract the loaded values and add it to the result phi node.
+void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize,
+ int GEPIndex, bool IsLittleEndian) {
+ if (LoadSize == 1) {
+ MemCmpExpansion::emitLoadCompareByteBlock(Index, GEPIndex);
+ return;
+ }
+
+ IRBuilder<> Builder(CI->getContext());
+
+ Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8);
+ Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
+
+ Value *Source1 = CI->getArgOperand(0);
+ Value *Source2 = CI->getArgOperand(1);
+
+ Builder.SetInsertPoint(LoadCmpBlocks[Index]);
+ // Cast source to LoadSizeType*
+ if (Source1->getType() != LoadSizeType)
+ Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
+ if (Source2->getType() != LoadSizeType)
+ Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
+
+ // Get the base address using the GEPIndex
+ if (GEPIndex != 0) {
+ Source1 = Builder.CreateGEP(LoadSizeType, Source1,
+ ConstantInt::get(LoadSizeType, GEPIndex));
+ Source2 = Builder.CreateGEP(LoadSizeType, Source2,
+ ConstantInt::get(LoadSizeType, GEPIndex));
+ }
+
+ // Load LoadSizeType from the base address
+ Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+ Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+
+ if (IsLittleEndian) {
+ Function *F = LoadCmpBlocks[Index]->getParent();
+
+ Function *Bswap = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::bswap, LoadSizeType);
+ LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
+ LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
+ }
+
+ if (LoadSizeType != MaxLoadType) {
+ LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType);
+ LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType);
+ }
+
+ // Add the loaded values to the phi nodes for calculating memcmp result only
+ // if result is not used in a zero equality.
+ if (!IsUsedForZeroCmp) {
+ ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[Index]);
+ ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[Index]);
+ }
+
+ Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2);
+
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
+ ConstantInt::get(Diff->getType(), 0));
+ BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1))
+ ? EndBlock
+ : LoadCmpBlocks[Index + 1];
+ // Early exit branch if difference found to ResultBlock, otherwise continue to
+ // next LoadCmpBlock or EndBlock.
+ BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
+ Builder.Insert(CmpBr);
+
+ // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
+ // since early exit to ResultBlock was not taken (no difference was found in
+ // any of the bytes)
+ if (Index == LoadCmpBlocks.size() - 1) {
+ Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
+ PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]);
+ }
+}
+
+// This function populates the ResultBlock with a sequence to calculate the
+// memcmp result. It compares the two loaded source values and returns -1 if
+// src1 < src2 and 1 if src1 > src2.
+void MemCmpExpansion::emitMemCmpResultBlock(bool IsLittleEndian) {
+ IRBuilder<> Builder(CI->getContext());
+
+ // Special case: if memcmp result is used in a zero equality, result does not
+ // need to be calculated and can simply return 1.
+ if (IsUsedForZeroCmp) {
+ BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
+ Builder.SetInsertPoint(ResBlock.BB, InsertPt);
+ Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1);
+ PhiRes->addIncoming(Res, ResBlock.BB);
+ BranchInst *NewBr = BranchInst::Create(EndBlock);
+ Builder.Insert(NewBr);
+ return;
+ }
+ BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
+ Builder.SetInsertPoint(ResBlock.BB, InsertPt);
+
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1,
+ ResBlock.PhiSrc2);
+
+ Value *Res =
+ Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1),
+ ConstantInt::get(Builder.getInt32Ty(), 1));
+
+ BranchInst *NewBr = BranchInst::Create(EndBlock);
+ Builder.Insert(NewBr);
+ PhiRes->addIncoming(Res, ResBlock.BB);
+}
+
+int MemCmpExpansion::calculateNumBlocks(unsigned Size) {
+ int NumBlocks = 0;
+ bool haveOneByteLoad = false;
+ unsigned RemainingSize = Size;
+ unsigned LoadSize = MaxLoadSize;
+ while (RemainingSize) {
+ if (LoadSize == 1)
+ haveOneByteLoad = true;
+ NumBlocks += RemainingSize / LoadSize;
+ RemainingSize = RemainingSize % LoadSize;
+ LoadSize = LoadSize / 2;
+ }
+ NumBlocksNonOneByte = haveOneByteLoad ? (NumBlocks - 1) : NumBlocks;
+
+ if (IsUsedForZeroCmp)
+ NumBlocks = NumBlocks / NumLoadsPerBlock +
+ (NumBlocks % NumLoadsPerBlock != 0 ? 1 : 0);
+
+ return NumBlocks;
+}
+
+void MemCmpExpansion::setupResultBlockPHINodes() {
+ IRBuilder<> Builder(CI->getContext());
+ Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
+ Builder.SetInsertPoint(ResBlock.BB);
+ ResBlock.PhiSrc1 =
+ Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src1");
+ ResBlock.PhiSrc2 =
+ Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src2");
+}
+
+void MemCmpExpansion::setupEndBlockPHINodes() {
+ IRBuilder<> Builder(CI->getContext());
+
+ Builder.SetInsertPoint(&EndBlock->front());
+ PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
+}
+
+Value *MemCmpExpansion::getMemCmpExpansionZeroCase(unsigned Size,
+ bool IsLittleEndian) {
+ unsigned NumBytesProcessed = 0;
+ // This loop populates each of the LoadCmpBlocks with IR sequence to handle
+ // multiple loads per block
+ for (unsigned i = 0; i < NumBlocks; ++i) {
+ emitLoadCompareBlockMultipleLoads(i, Size, NumBytesProcessed);
+ }
+
+ emitMemCmpResultBlock(IsLittleEndian);
+ return PhiRes;
+}
+
+// This function expands the memcmp call into an inline expansion and returns
+// the memcmp result.
+Value *MemCmpExpansion::getMemCmpExpansion(bool IsLittleEndian) {
+
+ ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ uint64_t Size = SizeCast->getZExtValue();
+
+ int LoadSize = MaxLoadSize;
+ int NumBytesToBeProcessed = Size;
+
+ if (IsUsedForZeroCmp) {
+ return getMemCmpExpansionZeroCase(Size, IsLittleEndian);
+ }
+
+ unsigned Index = 0;
+ // This loop calls emitLoadCompareBlock for comparing SizeVal bytes of the two
+ // memcmp source. It starts with loading using the maximum load size set by
+ // the target. It processes any remaining bytes using a load size which is the
+ // next smallest power of 2.
+ while (NumBytesToBeProcessed) {
+ // Calculate how many blocks we can create with the current load size
+ int NumBlocks = NumBytesToBeProcessed / LoadSize;
+ int GEPIndex = (Size - NumBytesToBeProcessed) / LoadSize;
+ NumBytesToBeProcessed = NumBytesToBeProcessed % LoadSize;
+
+ // For each NumBlocks, populate the instruction sequence for loading and
+ // comparing LoadSize bytes
+ while (NumBlocks--) {
+ emitLoadCompareBlock(Index, LoadSize, GEPIndex, IsLittleEndian);
+ Index++;
+ GEPIndex++;
+ }
+ // Get the next LoadSize to use
+ LoadSize = LoadSize / 2;
+ }
+
+ emitMemCmpResultBlock(IsLittleEndian);
+ return PhiRes;
+}
+
+// This function checks to see if an expansion of memcmp can be generated.
+// It checks for constant compare size that is less than the max inline size.
+// If an expansion cannot occur, returns false to leave as a library call.
+// Otherwise, the library call is replaced wtih new IR instruction sequence.
+/// We want to transform:
+/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15)
+/// To:
+/// loadbb:
+/// %0 = bitcast i32* %buffer2 to i8*
+/// %1 = bitcast i32* %buffer1 to i8*
+/// %2 = bitcast i8* %1 to i64*
+/// %3 = bitcast i8* %0 to i64*
+/// %4 = load i64, i64* %2
+/// %5 = load i64, i64* %3
+/// %6 = call i64 @llvm.bswap.i64(i64 %4)
+/// %7 = call i64 @llvm.bswap.i64(i64 %5)
+/// %8 = sub i64 %6, %7
+/// %9 = icmp ne i64 %8, 0
+/// br i1 %9, label %res_block, label %loadbb1
+/// res_block: ; preds = %loadbb2,
+/// %loadbb1, %loadbb
+/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ]
+/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ]
+/// %10 = icmp ult i64 %phi.src1, %phi.src2
+/// %11 = select i1 %10, i32 -1, i32 1
+/// br label %endblock
+/// loadbb1: ; preds = %loadbb
+/// %12 = bitcast i32* %buffer2 to i8*
+/// %13 = bitcast i32* %buffer1 to i8*
+/// %14 = bitcast i8* %13 to i32*
+/// %15 = bitcast i8* %12 to i32*
+/// %16 = getelementptr i32, i32* %14, i32 2
+/// %17 = getelementptr i32, i32* %15, i32 2
+/// %18 = load i32, i32* %16
+/// %19 = load i32, i32* %17
+/// %20 = call i32 @llvm.bswap.i32(i32 %18)
+/// %21 = call i32 @llvm.bswap.i32(i32 %19)
+/// %22 = zext i32 %20 to i64
+/// %23 = zext i32 %21 to i64
+/// %24 = sub i64 %22, %23
+/// %25 = icmp ne i64 %24, 0
+/// br i1 %25, label %res_block, label %loadbb2
+/// loadbb2: ; preds = %loadbb1
+/// %26 = bitcast i32* %buffer2 to i8*
+/// %27 = bitcast i32* %buffer1 to i8*
+/// %28 = bitcast i8* %27 to i16*
+/// %29 = bitcast i8* %26 to i16*
+/// %30 = getelementptr i16, i16* %28, i16 6
+/// %31 = getelementptr i16, i16* %29, i16 6
+/// %32 = load i16, i16* %30
+/// %33 = load i16, i16* %31
+/// %34 = call i16 @llvm.bswap.i16(i16 %32)
+/// %35 = call i16 @llvm.bswap.i16(i16 %33)
+/// %36 = zext i16 %34 to i64
+/// %37 = zext i16 %35 to i64
+/// %38 = sub i64 %36, %37
+/// %39 = icmp ne i64 %38, 0
+/// br i1 %39, label %res_block, label %loadbb3
+/// loadbb3: ; preds = %loadbb2
+/// %40 = bitcast i32* %buffer2 to i8*
+/// %41 = bitcast i32* %buffer1 to i8*
+/// %42 = getelementptr i8, i8* %41, i8 14
+/// %43 = getelementptr i8, i8* %40, i8 14
+/// %44 = load i8, i8* %42
+/// %45 = load i8, i8* %43
+/// %46 = zext i8 %44 to i32
+/// %47 = zext i8 %45 to i32
+/// %48 = sub i32 %46, %47
+/// br label %endblock
+/// endblock: ; preds = %res_block,
+/// %loadbb3
+/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
+/// ret i32 %phi.res
+static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
+ const TargetLowering *TLI, const DataLayout *DL) {
+ NumMemCmpCalls++;
+ IRBuilder<> Builder(CI->getContext());
+
+ // TTI call to check if target would like to expand memcmp and get the
+ // MaxLoadSize
+ unsigned MaxLoadSize;
+ if (!TTI->expandMemCmp(CI, MaxLoadSize))
+ return false;
+
+ // Early exit from expansion if -Oz
+ if (CI->getParent()->getParent()->optForMinSize()) {
+ return false;
+ }
+
+ // Early exit from expansion if size is not a constant
+ ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ if (!SizeCast) {
+ NumMemCmpNotConstant++;
+ return false;
+ }
+
+ // Early exit from expansion if size greater than max bytes to load
+ uint64_t SizeVal = SizeCast->getZExtValue();
+
+ unsigned NumLoads = 0;
+ unsigned RemainingSize = SizeVal;
+ unsigned LoadSize = MaxLoadSize;
+ while (RemainingSize) {
+ NumLoads += RemainingSize / LoadSize;
+ RemainingSize = RemainingSize % LoadSize;
+ LoadSize = LoadSize / 2;
+ }
+
+ if (NumLoads >
+ TLI->getMaxExpandSizeMemcmp(CI->getParent()->getParent()->optForSize())) {
+ NumMemCmpGreaterThanMax++;
+ return false;
+ }
+
+ NumMemCmpInlined++;
+
+ // MemCmpHelper object, creates and sets up basic blocks required for
+ // expanding memcmp with size SizeVal
+ unsigned NumLoadsPerBlock = MemCmpNumLoadsPerBlock;
+ MemCmpExpansion MemCmpHelper(CI, MaxLoadSize, NumLoadsPerBlock);
+
+ Value *Res = MemCmpHelper.getMemCmpExpansion(DL->isLittleEndian());
+
+ // Replace call with result of expansion and erarse call.
+ CI->replaceAllUsesWith(Res);
+ CI->eraseFromParent();
+
+ return true;
+}
+
bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
BasicBlock *BB = CI->getParent();
@@ -1780,6 +2380,15 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
CI->eraseFromParent();
return true;
}
+
+ LibFunc Func;
+ if (TLInfo->getLibFunc(*CI->getCalledFunction(), Func) &&
+ Func == LibFunc_memcmp) {
+ if (expandMemCmp(CI, TTI, TLI, DL)) {
+ ModifiedDT = true;
+ return true;
+ }
+ }
return false;
}
@@ -4927,6 +5536,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
return true;
}
+
namespace {
/// \brief Helper class to promote a scalar operation to a vector one.
/// This class is used to move downward extractelement transition.
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index b2d6652b075e7..a3cf2846d2f5d 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -74,7 +74,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I;
++I) {
unsigned Reg = *I;
- if (!IsReturnBlock && !(Pristine.test(Reg) || BB->isLiveIn(Reg)))
+ if (!IsReturnBlock && !Pristine.test(Reg))
continue;
for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
unsigned Reg = *AI;
diff --git a/lib/CodeGen/GlobalISel/Localizer.cpp b/lib/CodeGen/GlobalISel/Localizer.cpp
index c2a568e4b4521..c5d0999fe4388 100644
--- a/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -98,12 +98,10 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {
// Create the localized instruction.
MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI);
LocalizedInstrs.insert(LocalizedMI);
- // Move it at the right place.
- MachineInstr &MIUse = *MOUse.getParent();
- if (MIUse.getParent() == InsertMBB)
- InsertMBB->insert(MIUse, LocalizedMI);
- else
- InsertMBB->insert(InsertMBB->getFirstNonPHI(), LocalizedMI);
+ // Don't try to be smart for the insertion point.
+ // There is no guarantee that the first seen use is the first
+ // use in the block.
+ InsertMBB->insert(InsertMBB->getFirstNonPHI(), LocalizedMI);
// Set a new register for the definition.
unsigned NewReg =
diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp
index 24e289dd4f1b0..444416a77008c 100644
--- a/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/lib/CodeGen/ImplicitNullChecks.cpp
@@ -607,8 +607,20 @@ MachineInstr *ImplicitNullChecks::insertFaultingInstr(
.addMBB(HandlerMBB)
.addImm(MI->getOpcode());
- for (auto &MO : MI->uses())
- MIB.add(MO);
+ for (auto &MO : MI->uses()) {
+ if (MO.isReg()) {
+ MachineOperand NewMO = MO;
+ if (MO.isUse()) {
+ NewMO.setIsKill(false);
+ } else {
+ assert(MO.isDef() && "Expected def or use");
+ NewMO.setIsDead(false);
+ }
+ MIB.add(NewMO);
+ } else {
+ MIB.add(MO);
+ }
+ }
MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
diff --git a/lib/CodeGen/LiveRangeShrink.cpp b/lib/CodeGen/LiveRangeShrink.cpp
new file mode 100644
index 0000000000000..552f4b5393fef
--- /dev/null
+++ b/lib/CodeGen/LiveRangeShrink.cpp
@@ -0,0 +1,231 @@
+//===-- LiveRangeShrink.cpp - Move instructions to shrink live range ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+///===---------------------------------------------------------------------===//
+///
+/// \file
+/// This pass moves instructions close to the definition of its operands to
+/// shrink live range of the def instruction. The code motion is limited within
+/// the basic block. The moved instruction should have 1 def, and more than one
+/// uses, all of which are the only use of the def.
+///
+///===---------------------------------------------------------------------===//
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "lrshrink"
+
+STATISTIC(NumInstrsHoistedToShrinkLiveRange,
+ "Number of insructions hoisted to shrink live range.");
+
+using namespace llvm;
+
+namespace {
+class LiveRangeShrink : public MachineFunctionPass {
+public:
+ static char ID;
+
+ LiveRangeShrink() : MachineFunctionPass(ID) {
+ initializeLiveRangeShrinkPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override { return "Live Range Shrink"; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // End anonymous namespace.
+
+char LiveRangeShrink::ID = 0;
+char &llvm::LiveRangeShrinkID = LiveRangeShrink::ID;
+
+INITIALIZE_PASS(LiveRangeShrink, "lrshrink", "Live Range Shrink Pass", false,
+ false)
+namespace {
+typedef DenseMap<MachineInstr *, unsigned> InstOrderMap;
+
+/// Returns \p New if it's dominated by \p Old, otherwise return \p Old.
+/// \p M maintains a map from instruction to its dominating order that satisfies
+/// M[A] > M[B] guarantees that A is dominated by B.
+/// If \p New is not in \p M, return \p Old. Otherwise if \p Old is null, return
+/// \p New.
+MachineInstr *FindDominatedInstruction(MachineInstr &New, MachineInstr *Old,
+ const InstOrderMap &M) {
+ auto NewIter = M.find(&New);
+ if (NewIter == M.end())
+ return Old;
+ if (Old == nullptr)
+ return &New;
+ unsigned OrderOld = M.find(Old)->second;
+ unsigned OrderNew = NewIter->second;
+ if (OrderOld != OrderNew)
+ return OrderOld < OrderNew ? &New : Old;
+ // OrderOld == OrderNew, we need to iterate down from Old to see if it
+ // can reach New, if yes, New is dominated by Old.
+ for (MachineInstr *I = Old->getNextNode(); M.find(I)->second == OrderNew;
+ I = I->getNextNode())
+ if (I == &New)
+ return &New;
+ return Old;
+}
+
+/// Builds Instruction to its dominating order number map \p M by traversing
+/// from instruction \p Start.
+void BuildInstOrderMap(MachineBasicBlock::iterator Start, InstOrderMap &M) {
+ M.clear();
+ unsigned i = 0;
+ for (MachineInstr &I : make_range(Start, Start->getParent()->end()))
+ M[&I] = i++;
+}
+} // end anonymous namespace
+
+bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(*MF.getFunction()))
+ return false;
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
+
+ InstOrderMap IOM;
+ // Map from register to instruction order (value of IOM) where the
+ // register is used last. When moving instructions up, we need to
+ // make sure all its defs (including dead def) will not cross its
+ // last use when moving up.
+ DenseMap<unsigned, std::pair<unsigned, MachineInstr *>> UseMap;
+
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.empty())
+ continue;
+ bool SawStore = false;
+ BuildInstOrderMap(MBB.begin(), IOM);
+ UseMap.clear();
+
+ for (MachineBasicBlock::iterator Next = MBB.begin(); Next != MBB.end();) {
+ MachineInstr &MI = *Next;
+ ++Next;
+ if (MI.isPHI() || MI.isDebugValue())
+ continue;
+ if (MI.mayStore())
+ SawStore = true;
+
+ unsigned CurrentOrder = IOM[&MI];
+ unsigned Barrier = 0;
+ MachineInstr *BarrierMI = nullptr;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || MO.isDebug())
+ continue;
+ if (MO.isUse())
+ UseMap[MO.getReg()] = std::make_pair(CurrentOrder, &MI);
+ else if (MO.isDead() && UseMap.count(MO.getReg()))
+ // Barrier is the last instruction where MO get used. MI should not
+ // be moved above Barrier.
+ if (Barrier < UseMap[MO.getReg()].first) {
+ Barrier = UseMap[MO.getReg()].first;
+ BarrierMI = UseMap[MO.getReg()].second;
+ }
+ }
+
+ if (!MI.isSafeToMove(nullptr, SawStore)) {
+ // If MI has side effects, it should become a barrier for code motion.
+ // IOM is rebuild from the next instruction to prevent later
+ // instructions from being moved before this MI.
+ if (MI.hasUnmodeledSideEffects() && Next != MBB.end()) {
+ BuildInstOrderMap(Next, IOM);
+ SawStore = false;
+ }
+ continue;
+ }
+
+ const MachineOperand *DefMO = nullptr;
+ MachineInstr *Insert = nullptr;
+
+ // Number of live-ranges that will be shortened. We do not count
+ // live-ranges that are defined by a COPY as it could be coalesced later.
+ unsigned NumEligibleUse = 0;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || MO.isDead() || MO.isDebug())
+ continue;
+ unsigned Reg = MO.getReg();
+ // Do not move the instruction if it def/uses a physical register,
+ // unless it is a constant physical register or a noreg.
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (!Reg || MRI.isConstantPhysReg(Reg))
+ continue;
+ Insert = nullptr;
+ break;
+ }
+ if (MO.isDef()) {
+ // Do not move if there is more than one def.
+ if (DefMO) {
+ Insert = nullptr;
+ break;
+ }
+ DefMO = &MO;
+ } else if (MRI.hasOneNonDBGUse(Reg) && MRI.hasOneDef(Reg) && DefMO &&
+ MRI.getRegClass(DefMO->getReg()) ==
+ MRI.getRegClass(MO.getReg())) {
+ // The heuristic does not handle different register classes yet
+ // (registers of different sizes, looser/tighter constraints). This
+ // is because it needs more accurate model to handle register
+ // pressure correctly.
+ MachineInstr &DefInstr = *MRI.def_instr_begin(Reg);
+ if (!DefInstr.isCopy())
+ NumEligibleUse++;
+ Insert = FindDominatedInstruction(DefInstr, Insert, IOM);
+ } else {
+ Insert = nullptr;
+ break;
+ }
+ }
+
+ // If Barrier equals IOM[I], traverse forward to find if BarrierMI is
+ // after Insert, if yes, then we should not hoist.
+ for (MachineInstr *I = Insert; I && IOM[I] == Barrier;
+ I = I->getNextNode())
+ if (I == BarrierMI) {
+ Insert = nullptr;
+ break;
+ }
+ // Move the instruction when # of shrunk live range > 1.
+ if (DefMO && Insert && NumEligibleUse > 1 && Barrier <= IOM[Insert]) {
+ MachineBasicBlock::iterator I = std::next(Insert->getIterator());
+ // Skip all the PHI and debug instructions.
+ while (I != MBB.end() && (I->isPHI() || I->isDebugValue()))
+ I = std::next(I);
+ if (I == MI.getIterator())
+ continue;
+
+ // Update the dominator order to be the same as the insertion point.
+ // We do this to maintain a non-decreasing order without need to update
+ // all instruction orders after the insertion point.
+ unsigned NewOrder = IOM[&*I];
+ IOM[&MI] = NewOrder;
+ NumInstrsHoistedToShrinkLiveRange++;
+
+ // Find MI's debug value following MI.
+ MachineBasicBlock::iterator EndIter = std::next(MI.getIterator());
+ if (MI.getOperand(0).isReg())
+ for (; EndIter != MBB.end() && EndIter->isDebugValue() &&
+ EndIter->getOperand(0).isReg() &&
+ EndIter->getOperand(0).getReg() == MI.getOperand(0).getReg();
+ ++EndIter, ++Next)
+ IOM[&*EndIter] = NewOrder;
+ MBB.splice(I, &MBB, MI.getIterator(), EndIter);
+ }
+ }
+ }
+ return false;
+}
diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp
index bd04acd049dba..ff12297e3fc67 100644
--- a/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -332,8 +332,6 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
MF.setAlignment(YamlMF.Alignment);
MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
- if (YamlMF.NoVRegs)
- MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
if (YamlMF.Legalized)
MF.getProperties().set(MachineFunctionProperties::Property::Legalized);
if (YamlMF.RegBankSelected)
diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp
index 6f6a67d81b0fe..293fc7358b8ed 100644
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@@ -183,8 +183,6 @@ void MIRPrinter::print(const MachineFunction &MF) {
YamlMF.Alignment = MF.getAlignment();
YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
- YamlMF.NoVRegs = MF.getProperties().hasProperty(
- MachineFunctionProperties::Property::NoVRegs);
YamlMF.Legalized = MF.getProperties().hasProperty(
MachineFunctionProperties::Property::Legalized);
YamlMF.RegBankSelected = MF.getProperties().hasProperty(
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 06112723497b0..590acc01008a6 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -350,6 +350,13 @@ void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) {
LiveIns.erase(I);
}
+MachineBasicBlock::livein_iterator
+MachineBasicBlock::removeLiveIn(MachineBasicBlock::livein_iterator I) {
+ // Get non-const version of iterator.
+ LiveInVector::iterator LI = LiveIns.begin() + (I - LiveIns.begin());
+ return LiveIns.erase(LI);
+}
+
bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const {
livein_iterator I = find_if(
LiveIns, [Reg](const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index d665201a5d17c..306b75dbbae7e 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -1,4 +1,4 @@
-//===-- lib/CodeGen/MachineInstr.cpp --------------------------------------===//
+//===- lib/CodeGen/MachineInstr.cpp ---------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,21 +11,34 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
@@ -35,9 +48,13 @@
#include "llvm/IR/Value.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -45,6 +62,14 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <utility>
+
using namespace llvm;
static cl::opt<bool> PrintWholeRegMask(
@@ -256,7 +281,7 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
case MachineOperand::MO_GlobalAddress:
return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset();
case MachineOperand::MO_ExternalSymbol:
- return !strcmp(getSymbolName(), Other.getSymbolName()) &&
+ return strcmp(getSymbolName(), Other.getSymbolName()) == 0 &&
getOffset() == Other.getOffset();
case MachineOperand::MO_BlockAddress:
return getBlockAddress() == Other.getBlockAddress() &&
@@ -723,9 +748,7 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
/// the MCInstrDesc.
MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
DebugLoc dl, bool NoImp)
- : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0),
- AsmPrinterFlags(0), NumMemRefs(0), MemRefs(nullptr),
- debugLoc(std::move(dl)) {
+ : MCID(&tid), debugLoc(std::move(dl)) {
assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
// Reserve space for the expected number of operands.
@@ -742,9 +765,8 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
/// MachineInstr ctor - Copies MachineInstr arg exactly
///
MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
- : MCID(&MI.getDesc()), Parent(nullptr), Operands(nullptr), NumOperands(0),
- Flags(0), AsmPrinterFlags(0), NumMemRefs(MI.NumMemRefs),
- MemRefs(MI.MemRefs), debugLoc(MI.getDebugLoc()) {
+ : MCID(&MI.getDesc()), NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs),
+ debugLoc(MI.getDebugLoc()) {
assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
CapOperands = OperandCapacity::get(MI.getNumOperands());
@@ -1633,8 +1655,8 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
// memory objects. It can save compile time, and possibly catch some
// corner cases not currently covered.
- assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
- assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
+ assert((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
+ assert((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
@@ -1667,7 +1689,7 @@ bool MachineInstr::hasOrderedMemoryRef() const {
return true;
// Check if any of our memory operands are ordered.
- return any_of(memoperands(), [](const MachineMemOperand *MMO) {
+ return llvm::any_of(memoperands(), [](const MachineMemOperand *MMO) {
return !MMO->isUnordered();
});
}
@@ -1841,7 +1863,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
return;
// Print the rest of the operands.
- bool OmittedAnyCallClobbers = false;
bool FirstOp = true;
unsigned AsmDescOp = ~0u;
unsigned AsmOpCount = 0;
@@ -1878,31 +1899,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
VirtRegs.push_back(MO.getReg());
- // Omit call-clobbered registers which aren't used anywhere. This makes
- // call instructions much less noisy on targets where calls clobber lots
- // of registers. Don't rely on MO.isDead() because we may be called before
- // LiveVariables is run, or we may be looking at a non-allocatable reg.
- if (MRI && isCall() &&
- MO.isReg() && MO.isImplicit() && MO.isDef()) {
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- if (MRI->use_empty(Reg)) {
- bool HasAliasLive = false;
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
- unsigned AliasReg = *AI;
- if (!MRI->use_empty(AliasReg)) {
- HasAliasLive = true;
- break;
- }
- }
- if (!HasAliasLive) {
- OmittedAnyCallClobbers = true;
- continue;
- }
- }
- }
- }
-
if (FirstOp) FirstOp = false; else OS << ",";
OS << " ";
if (i < getDesc().NumOperands) {
@@ -1984,12 +1980,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
MO.print(OS, MST, TRI);
}
- // Briefly indicate whether any call clobbers were omitted.
- if (OmittedAnyCallClobbers) {
- if (!FirstOp) OS << ",";
- OS << " ...";
- }
-
bool HaveSemi = false;
const unsigned PrintableFlags = FrameSetup | FrameDestroy;
if (Flags & PrintableFlags) {
@@ -2255,8 +2245,8 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
// If there are no uses, including partial uses, the def is dead.
- if (none_of(UsedRegs,
- [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); }))
+ if (llvm::none_of(UsedRegs,
+ [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); }))
MO.setIsDead();
}
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 6cf751d34e268..c1b72430e6053 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -7,27 +7,34 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionInitializer.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Constants.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
-#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Dwarf.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+#include <cassert>
+#include <memory>
+#include <utility>
+#include <vector>
+
using namespace llvm;
using namespace llvm::dwarf;
@@ -37,14 +44,16 @@ INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
char MachineModuleInfo::ID = 0;
// Out of line virtual method.
-MachineModuleInfoImpl::~MachineModuleInfoImpl() {}
+MachineModuleInfoImpl::~MachineModuleInfoImpl() = default;
namespace llvm {
+
class MMIAddrLabelMapCallbackPtr final : CallbackVH {
- MMIAddrLabelMap *Map;
+ MMIAddrLabelMap *Map = nullptr;
+
public:
- MMIAddrLabelMapCallbackPtr() : Map(nullptr) {}
- MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(nullptr) {}
+ MMIAddrLabelMapCallbackPtr() = default;
+ MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V) {}
void setPtr(BasicBlock *BB) {
ValueHandleBase::operator=(BB);
@@ -75,11 +84,12 @@ class MMIAddrLabelMap {
/// This is a per-function list of symbols whose corresponding BasicBlock got
/// deleted. These symbols need to be emitted at some point in the file, so
/// AsmPrinter emits them after the function body.
- DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >
+ DenseMap<AssertingVH<Function>, std::vector<MCSymbol*>>
DeletedAddrLabelsNeedingEmission;
-public:
+public:
MMIAddrLabelMap(MCContext &context) : Context(context) {}
+
~MMIAddrLabelMap() {
assert(DeletedAddrLabelsNeedingEmission.empty() &&
"Some labels for deleted blocks never got emitted");
@@ -93,7 +103,8 @@ public:
void UpdateForDeletedBlock(BasicBlock *BB);
void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New);
};
-}
+
+} // end namespace llvm
ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
assert(BB->hasAddressTaken() &&
@@ -119,7 +130,7 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
/// If we have any deleted symbols for F, return them.
void MMIAddrLabelMap::
takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
- DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >::iterator I =
+ DenseMap<AssertingVH<Function>, std::vector<MCSymbol*>>::iterator I =
DeletedAddrLabelsNeedingEmission.find(F);
// If there are no entries for the function, just return.
@@ -130,7 +141,6 @@ takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
DeletedAddrLabelsNeedingEmission.erase(I);
}
-
void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
// If the block got deleted, there is no need for the symbol. If the symbol
// was already emitted, we can just forget about it, otherwise we need to
@@ -177,7 +187,6 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
OldEntry.Symbols.end());
}
-
void MMIAddrLabelMapCallbackPtr::deleted() {
Map->UpdateForDeletedBlock(cast<BasicBlock>(getValPtr()));
}
@@ -186,9 +195,6 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2));
}
-
-//===----------------------------------------------------------------------===//
-
MachineModuleInfo::MachineModuleInfo(const TargetMachine *TM)
: ImmutablePass(ID), TM(*TM),
Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(),
@@ -196,11 +202,9 @@ MachineModuleInfo::MachineModuleInfo(const TargetMachine *TM)
initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
}
-MachineModuleInfo::~MachineModuleInfo() {
-}
+MachineModuleInfo::~MachineModuleInfo() = default;
bool MachineModuleInfo::doInitialization(Module &M) {
-
ObjFileMMI = nullptr;
CurCallSite = 0;
DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false;
@@ -211,7 +215,6 @@ bool MachineModuleInfo::doInitialization(Module &M) {
}
bool MachineModuleInfo::doFinalization(Module &M) {
-
Personalities.clear();
delete AddrLabelSymbols;
@@ -290,10 +293,12 @@ void MachineModuleInfo::deleteMachineFunctionFor(Function &F) {
}
namespace {
+
/// This pass frees the MachineFunction object associated with a Function.
class FreeMachineFunction : public FunctionPass {
public:
static char ID;
+
FreeMachineFunction() : FunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -311,14 +316,14 @@ public:
return "Free MachineFunction";
}
};
-char FreeMachineFunction::ID;
+
} // end anonymous namespace
-namespace llvm {
-FunctionPass *createFreeMachineFunctionPass() {
+char FreeMachineFunction::ID;
+
+FunctionPass *llvm::createFreeMachineFunctionPass() {
return new FreeMachineFunction();
}
-} // end namespace llvm
//===- MMI building helpers -----------------------------------------------===//
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ab36bc1417aee..fb51a4eb14212 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -280,6 +280,7 @@ namespace {
SDValue visitSELECT_CC(SDNode *N);
SDValue visitSETCC(SDNode *N);
SDValue visitSETCCE(SDNode *N);
+ SDValue visitSETCCCARRY(SDNode *N);
SDValue visitSIGN_EXTEND(SDNode *N);
SDValue visitZERO_EXTEND(SDNode *N);
SDValue visitANY_EXTEND(SDNode *N);
@@ -1457,6 +1458,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SELECT_CC: return visitSELECT_CC(N);
case ISD::SETCC: return visitSETCC(N);
case ISD::SETCCE: return visitSETCCE(N);
+ case ISD::SETCCCARRY: return visitSETCCCARRY(N);
case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
@@ -1958,7 +1960,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
// fold (a+b) -> (a|b) iff a and b share no bits.
if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
- VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1))
+ DAG.haveNoCommonBitsSet(N0, N1))
return DAG.getNode(ISD::OR, DL, VT, N0, N1);
if (SDValue Combined = visitADDLike(N0, N1, N))
@@ -1970,6 +1972,44 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return SDValue();
}
+static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
+ bool Masked = false;
+
+ // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
+ while (true) {
+ if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
+ V = V.getOperand(0);
+ continue;
+ }
+
+ if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
+ Masked = true;
+ V = V.getOperand(0);
+ continue;
+ }
+
+ break;
+ }
+
+ // If this is not a carry, return.
+ if (V.getResNo() != 1)
+ return SDValue();
+
+ if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
+ V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
+ return SDValue();
+
+ // If the result is masked, then no matter what kind of bool it is we can
+ // return. If it isn't, then we need to make sure the bool type is either 0 or
+ // 1 and not other values.
+ if (Masked ||
+ TLI.getBooleanContents(V.getValueType()) ==
+ TargetLoweringBase::ZeroOrOneBooleanContent)
+ return V;
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
EVT VT = N0.getValueType();
SDLoc DL(LocReference);
@@ -2017,6 +2057,13 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference)
return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
N0, N1.getOperand(0), N1.getOperand(2));
+ // (add X, Carry) -> (addcarry X, 0, Carry)
+ if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
+ if (SDValue Carry = getAsCarry(TLI, N1))
+ return DAG.getNode(ISD::ADDCARRY, DL,
+ DAG.getVTList(VT, Carry.getValueType()), N0,
+ DAG.getConstant(0, DL, VT), Carry);
+
return SDValue();
}
@@ -2090,6 +2137,8 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) {
}
SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
+ auto VT = N0.getValueType();
+
// (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
// If Y + 1 cannot overflow.
if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
@@ -2100,6 +2149,12 @@ SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
N1.getOperand(2));
}
+ // (uaddo X, Carry) -> (addcarry X, 0, Carry)
+ if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
+ if (SDValue Carry = getAsCarry(TLI, N1))
+ return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
+ DAG.getConstant(0, SDLoc(N), VT), Carry);
+
return SDValue();
}
@@ -2167,6 +2222,41 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
N0.getOperand(0), N0.getOperand(1), CarryIn);
+ /**
+ * When one of the addcarry argument is itself a carry, we may be facing
+ * a diamond carry propagation. In which case we try to transform the DAG
+ * to ensure linear carry propagation if that is possible.
+ *
+ * We are trying to get:
+ * (addcarry X, 0, (addcarry A, B, Z):Carry)
+ */
+ if (auto Y = getAsCarry(TLI, N1)) {
+ /**
+ * (uaddo A, B)
+ * / \
+ * Carry Sum
+ * | \
+ * | (addcarry *, 0, Z)
+ * | /
+ * \ Carry
+ * | /
+ * (addcarry X, *, *)
+ */
+ if (Y.getOpcode() == ISD::UADDO &&
+ CarryIn.getResNo() == 1 &&
+ CarryIn.getOpcode() == ISD::ADDCARRY &&
+ isNullConstant(CarryIn.getOperand(1)) &&
+ CarryIn.getOperand(0) == Y.getValue(0)) {
+ auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
+ Y.getOperand(0), Y.getOperand(1),
+ CarryIn.getOperand(2));
+ AddToWorklist(NewY.getNode());
+ return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
+ DAG.getConstant(0, SDLoc(N), N0.getValueType()),
+ NewY.getValue(1));
+ }
+ }
+
return SDValue();
}
@@ -6754,6 +6844,19 @@ SDValue DAGCombiner::visitSETCCE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Carry = N->getOperand(2);
+ SDValue Cond = N->getOperand(3);
+
+ // If Carry is false, fold to a regular SETCC.
+ if (isNullConstant(Carry))
+ return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
+
+ return SDValue();
+}
+
/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
/// a build_vector of constants.
/// This function is called by the DAGCombiner when visiting sext/zext/aext
@@ -7124,12 +7227,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
- CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
- CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+ return CombineTo(N, ExtLoad); // Return N so it doesn't get rechecked!
}
}
@@ -7185,10 +7287,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
SDLoc(N0.getOperand(0)),
N0.getOperand(0).getValueType(), ExtLoad);
- CombineTo(N, And);
- CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
+ return CombineTo(N, And); // Return N so it doesn't get rechecked!
}
}
}
@@ -7427,12 +7528,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND);
CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
-
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
- ISD::ZERO_EXTEND);
- CombineTo(N, ExtLoad);
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ return CombineTo(N, ExtLoad); // Return N so it doesn't get rechecked!
}
}
@@ -7482,11 +7580,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
SDLoc(N0.getOperand(0)),
N0.getOperand(0).getValueType(), ExtLoad);
- CombineTo(N, And);
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND);
CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
- ISD::ZERO_EXTEND);
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ return CombineTo(N, And); // Return N so it doesn't get rechecked!
}
}
}
@@ -12777,10 +12873,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
}
// If we have load/store pair instructions and we only have two values,
- // don't bother.
+ // don't bother merging.
unsigned RequiredAlignment;
if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
- St->getAlignment() >= RequiredAlignment) {
+ StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
continue;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 92b0d2ae4015c..0d5e07ded25c4 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2875,6 +2875,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
case ISD::SETCCE: Res = ExpandIntOp_SETCCE(N); break;
+ case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break;
case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;
case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break;
@@ -3009,14 +3010,16 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
return;
}
- // Lower with SETCCE if the target supports it.
+ // Lower with SETCCE or SETCCCARRY if the target supports it.
+ EVT HiVT = LHSHi.getValueType();
+ EVT ExpandVT = TLI.getTypeToExpandTo(*DAG.getContext(), HiVT);
+ bool HasSETCCCARRY = TLI.isOperationLegalOrCustom(ISD::SETCCCARRY, ExpandVT);
+
// FIXME: Make all targets support this, then remove the other lowering.
- if (TLI.getOperationAction(
- ISD::SETCCE,
- TLI.getTypeToExpandTo(*DAG.getContext(), LHSLo.getValueType())) ==
- TargetLowering::Custom) {
- // SETCCE can detect < and >= directly. For > and <=, flip operands and
- // condition code.
+ if (HasSETCCCARRY ||
+ TLI.getOperationAction(ISD::SETCCE, ExpandVT) == TargetLowering::Custom) {
+ // SETCCE/SETCCCARRY can detect < and >= directly. For > and <=, flip
+ // operands and condition code.
bool FlipOperands = false;
switch (CCCode) {
case ISD::SETGT: CCCode = ISD::SETLT; FlipOperands = true; break;
@@ -3030,27 +3033,28 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
std::swap(LHSHi, RHSHi);
}
// Perform a wide subtraction, feeding the carry from the low part into
- // SETCCE. The SETCCE operation is essentially looking at the high part of
- // the result of LHS - RHS. It is negative iff LHS < RHS. It is zero or
- // positive iff LHS >= RHS.
- SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue);
- SDValue LowCmp = DAG.getNode(ISD::SUBC, dl, VTList, LHSLo, RHSLo);
- SDValue Res =
- DAG.getNode(ISD::SETCCE, dl, getSetCCResultType(LHSLo.getValueType()),
- LHSHi, RHSHi, LowCmp.getValue(1), DAG.getCondCode(CCCode));
+ // SETCCE/SETCCCARRY. The SETCCE/SETCCCARRY operation is essentially
+ // looking at the high part of the result of LHS - RHS. It is negative
+ // iff LHS < RHS. It is zero or positive iff LHS >= RHS.
+ EVT LoVT = LHSLo.getValueType();
+ SDVTList VTList = DAG.getVTList(
+ LoVT, HasSETCCCARRY ? getSetCCResultType(LoVT) : MVT::Glue);
+ SDValue LowCmp = DAG.getNode(HasSETCCCARRY ? ISD::USUBO : ISD::SUBC, dl,
+ VTList, LHSLo, RHSLo);
+ SDValue Res = DAG.getNode(HasSETCCCARRY ? ISD::SETCCCARRY : ISD::SETCCE, dl,
+ getSetCCResultType(HiVT), LHSHi, RHSHi,
+ LowCmp.getValue(1), DAG.getCondCode(CCCode));
NewLHS = Res;
NewRHS = SDValue();
return;
}
- NewLHS = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()),
- LHSHi, RHSHi, ISD::SETEQ, false,
- DagCombineInfo, dl);
+ NewLHS = TLI.SimplifySetCC(getSetCCResultType(HiVT), LHSHi, RHSHi, ISD::SETEQ,
+ false, DagCombineInfo, dl);
if (!NewLHS.getNode())
- NewLHS = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()),
- LHSHi, RHSHi, ISD::SETEQ);
- NewLHS = DAG.getSelect(dl, LoCmp.getValueType(),
- NewLHS, LoCmp, HiCmp);
+ NewLHS =
+ DAG.getSetCC(dl, getSetCCResultType(HiVT), LHSHi, RHSHi, ISD::SETEQ);
+ NewLHS = DAG.getSelect(dl, LoCmp.getValueType(), NewLHS, LoCmp, HiCmp);
NewRHS = SDValue();
}
@@ -3103,8 +3107,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
}
// Otherwise, update N to have the operands specified.
- return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
- DAG.getCondCode(CCCode)), 0);
+ return SDValue(
+ DAG.UpdateNodeOperands(N, NewLHS, NewRHS, DAG.getCondCode(CCCode)), 0);
}
SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) {
@@ -3125,6 +3129,24 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) {
LowCmp.getValue(1), Cond);
}
+SDValue DAGTypeLegalizer::ExpandIntOp_SETCCCARRY(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Carry = N->getOperand(2);
+ SDValue Cond = N->getOperand(3);
+ SDLoc dl = SDLoc(N);
+
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedInteger(LHS, LHSLo, LHSHi);
+ GetExpandedInteger(RHS, RHSLo, RHSHi);
+
+ // Expand to a SUBE for the low part and a smaller SETCCCARRY for the high.
+ SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), Carry.getValueType());
+ SDValue LowCmp = DAG.getNode(ISD::SUBCARRY, dl, VTList, LHSLo, RHSLo, Carry);
+ return DAG.getNode(ISD::SETCCCARRY, dl, N->getValueType(0), LHSHi, RHSHi,
+ LowCmp.getValue(1), Cond);
+}
+
SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
// The value being shifted is legal, but the shift amount is too big.
// It follows that either the result of the shift is undefined, or the
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 4c3b514856b78..8e999188d8e10 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -381,6 +381,7 @@ private:
SDValue ExpandIntOp_SELECT_CC(SDNode *N);
SDValue ExpandIntOp_SETCC(SDNode *N);
SDValue ExpandIntOp_SETCCE(SDNode *N);
+ SDValue ExpandIntOp_SETCCCARRY(SDNode *N);
SDValue ExpandIntOp_Shift(SDNode *N);
SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 4f4025d8ae6ad..579112c9bfc84 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -226,6 +226,7 @@ private:
void UnscheduleNodeBottomUp(SUnit*);
void RestoreHazardCheckerBottomUp();
void BacktrackBottomUp(SUnit*, SUnit*);
+ SUnit *TryUnfoldSU(SUnit *);
SUnit *CopyAndMoveSuccessors(SUnit*);
void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
const TargetRegisterClass*,
@@ -780,7 +781,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
}
/// CapturePred - This does the opposite of ReleasePred. Since SU is being
-/// unscheduled, incrcease the succ left count of its predecessors. Remove
+/// unscheduled, increase the succ left count of its predecessors. Remove
/// them from AvailableQueue if necessary.
void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
SUnit *PredSU = PredEdge->getSUnit();
@@ -934,6 +935,146 @@ static bool isOperandOf(const SUnit *SU, SDNode *N) {
return false;
}
+/// TryUnfold - Attempt to unfold
+SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
+ SDNode *N = SU->getNode();
+ // Use while over if to ease fall through.
+ SmallVector<SDNode *, 2> NewNodes;
+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ return nullptr;
+
+ // unfolding an x86 DEC64m operation results in store, dec, load which
+ // can't be handled here so quit
+ if (NewNodes.size() == 3)
+ return nullptr;
+
+ assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+ N = NewNodes[1];
+ SDNode *LoadNode = NewNodes[0];
+ unsigned NumVals = N->getNumValues();
+ unsigned OldNumVals = SU->getNode()->getNumValues();
+
+ // LoadNode may already exist. This can happen when there is another
+ // load from the same location and producing the same type of value
+ // but it has different alignment or volatileness.
+ bool isNewLoad = true;
+ SUnit *LoadSU;
+ if (LoadNode->getNodeId() != -1) {
+ LoadSU = &SUnits[LoadNode->getNodeId()];
+ // If LoadSU has already been scheduled, we should clone it but
+ // this would negate the benefit to unfolding so just return SU.
+ if (LoadSU->isScheduled)
+ return SU;
+ isNewLoad = false;
+ } else {
+ LoadSU = CreateNewSUnit(LoadNode);
+ LoadNode->setNodeId(LoadSU->NodeNum);
+
+ InitNumRegDefsLeft(LoadSU);
+ computeLatency(LoadSU);
+ }
+
+ DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
+
+ // Now that we are committed to unfolding replace DAG Uses.
+ for (unsigned i = 0; i != NumVals; ++i)
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
+ SDValue(LoadNode, 1));
+
+ SUnit *NewSU = CreateNewSUnit(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NewSU->NodeNum);
+
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ NewSU->isCommutable = true;
+
+ InitNumRegDefsLeft(NewSU);
+ computeLatency(NewSU);
+
+ // Record all the edges to and from the old SU, by category.
+ SmallVector<SDep, 4> ChainPreds;
+ SmallVector<SDep, 4> ChainSuccs;
+ SmallVector<SDep, 4> LoadPreds;
+ SmallVector<SDep, 4> NodePreds;
+ SmallVector<SDep, 4> NodeSuccs;
+ for (SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ ChainPreds.push_back(Pred);
+ else if (isOperandOf(Pred.getSUnit(), LoadNode))
+ LoadPreds.push_back(Pred);
+ else
+ NodePreds.push_back(Pred);
+ }
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl())
+ ChainSuccs.push_back(Succ);
+ else
+ NodeSuccs.push_back(Succ);
+ }
+
+ // Now assign edges to the newly-created nodes.
+ for (const SDep &Pred : ChainPreds) {
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPred(LoadSU, Pred);
+ }
+ for (const SDep &Pred : LoadPreds) {
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPred(LoadSU, Pred);
+ }
+ for (const SDep &Pred : NodePreds) {
+ RemovePred(SU, Pred);
+ AddPred(NewSU, Pred);
+ }
+ for (SDep D : NodeSuccs) {
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ D.setSUnit(NewSU);
+ AddPred(SuccDep, D);
+ // Balance register pressure.
+ if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
+ !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
+ --NewSU->NumRegDefsLeft;
+ }
+ for (SDep D : ChainSuccs) {
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ if (isNewLoad) {
+ D.setSUnit(LoadSU);
+ AddPred(SuccDep, D);
+ }
+ }
+
+ // Add a data dependency to reflect that NewSU reads the value defined
+ // by LoadSU.
+ SDep D(LoadSU, SDep::Data, 0);
+ D.setLatency(LoadSU->Latency);
+ AddPred(NewSU, D);
+
+ if (isNewLoad)
+ AvailableQueue->addNode(LoadSU);
+ AvailableQueue->addNode(NewSU);
+
+ ++NumUnfolds;
+
+ if (NewSU->NumSuccsLeft == 0)
+ NewSU->isAvailable = true;
+
+ return NewSU;
+}
+
/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
/// successors to the newly created node.
SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
@@ -959,135 +1100,16 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
return nullptr;
}
+ // If possible unfold instruction.
if (TryUnfold) {
- SmallVector<SDNode*, 2> NewNodes;
- if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ SUnit *UnfoldSU = TryUnfoldSU(SU);
+ if (!UnfoldSU)
return nullptr;
-
- // unfolding an x86 DEC64m operation results in store, dec, load which
- // can't be handled here so quit
- if (NewNodes.size() == 3)
- return nullptr;
-
- DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
- assert(NewNodes.size() == 2 && "Expected a load folding node!");
-
- N = NewNodes[1];
- SDNode *LoadNode = NewNodes[0];
- unsigned NumVals = N->getNumValues();
- unsigned OldNumVals = SU->getNode()->getNumValues();
- for (unsigned i = 0; i != NumVals; ++i)
- DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
- DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
- SDValue(LoadNode, 1));
-
- // LoadNode may already exist. This can happen when there is another
- // load from the same location and producing the same type of value
- // but it has different alignment or volatileness.
- bool isNewLoad = true;
- SUnit *LoadSU;
- if (LoadNode->getNodeId() != -1) {
- LoadSU = &SUnits[LoadNode->getNodeId()];
- isNewLoad = false;
- } else {
- LoadSU = CreateNewSUnit(LoadNode);
- LoadNode->setNodeId(LoadSU->NodeNum);
-
- InitNumRegDefsLeft(LoadSU);
- computeLatency(LoadSU);
- }
-
- SUnit *NewSU = CreateNewSUnit(N);
- assert(N->getNodeId() == -1 && "Node already inserted!");
- N->setNodeId(NewSU->NodeNum);
-
- const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
- for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
- if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
- NewSU->isTwoAddress = true;
- break;
- }
- }
- if (MCID.isCommutable())
- NewSU->isCommutable = true;
-
- InitNumRegDefsLeft(NewSU);
- computeLatency(NewSU);
-
- // Record all the edges to and from the old SU, by category.
- SmallVector<SDep, 4> ChainPreds;
- SmallVector<SDep, 4> ChainSuccs;
- SmallVector<SDep, 4> LoadPreds;
- SmallVector<SDep, 4> NodePreds;
- SmallVector<SDep, 4> NodeSuccs;
- for (SDep &Pred : SU->Preds) {
- if (Pred.isCtrl())
- ChainPreds.push_back(Pred);
- else if (isOperandOf(Pred.getSUnit(), LoadNode))
- LoadPreds.push_back(Pred);
- else
- NodePreds.push_back(Pred);
- }
- for (SDep &Succ : SU->Succs) {
- if (Succ.isCtrl())
- ChainSuccs.push_back(Succ);
- else
- NodeSuccs.push_back(Succ);
- }
-
- // Now assign edges to the newly-created nodes.
- for (const SDep &Pred : ChainPreds) {
- RemovePred(SU, Pred);
- if (isNewLoad)
- AddPred(LoadSU, Pred);
- }
- for (const SDep &Pred : LoadPreds) {
- RemovePred(SU, Pred);
- if (isNewLoad)
- AddPred(LoadSU, Pred);
- }
- for (const SDep &Pred : NodePreds) {
- RemovePred(SU, Pred);
- AddPred(NewSU, Pred);
- }
- for (SDep D : NodeSuccs) {
- SUnit *SuccDep = D.getSUnit();
- D.setSUnit(SU);
- RemovePred(SuccDep, D);
- D.setSUnit(NewSU);
- AddPred(SuccDep, D);
- // Balance register pressure.
- if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
- && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
- --NewSU->NumRegDefsLeft;
- }
- for (SDep D : ChainSuccs) {
- SUnit *SuccDep = D.getSUnit();
- D.setSUnit(SU);
- RemovePred(SuccDep, D);
- if (isNewLoad) {
- D.setSUnit(LoadSU);
- AddPred(SuccDep, D);
- }
- }
-
- // Add a data dependency to reflect that NewSU reads the value defined
- // by LoadSU.
- SDep D(LoadSU, SDep::Data, 0);
- D.setLatency(LoadSU->Latency);
- AddPred(NewSU, D);
-
- if (isNewLoad)
- AvailableQueue->addNode(LoadSU);
- AvailableQueue->addNode(NewSU);
-
- ++NumUnfolds;
-
- if (NewSU->NumSuccsLeft == 0) {
- NewSU->isAvailable = true;
- return NewSU;
- }
- SU = NewSU;
+ SU = UnfoldSU;
+ N = SU->getNode();
+ // If this can be scheduled don't bother duplicating and just return
+ if (SU->NumSuccsLeft == 0)
+ return SU;
}
DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index c37d7080f2c5a..0dbd9e846aa60 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -214,6 +214,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FPOWI: return "fpowi";
case ISD::SETCC: return "setcc";
case ISD::SETCCE: return "setcce";
+ case ISD::SETCCCARRY: return "setcccarry";
case ISD::SELECT: return "select";
case ISD::VSELECT: return "vselect";
case ISD::SELECT_CC: return "select_cc";
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 0def5ae6d0d0d..900c0318b179a 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -842,9 +842,10 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
initActions();
// Perform these initializations only once.
- MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
- MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
- = MaxStoresPerMemmoveOptSize = 4;
+ MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove =
+ MaxLoadsPerMemcmp = 8;
+ MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize =
+ MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4;
UseUnderscoreSetJmp = false;
UseUnderscoreLongJmp = false;
HasMultipleConditionRegisters = false;
@@ -926,6 +927,7 @@ void TargetLoweringBase::initActions() {
// ADDCARRY operations default to expand
setOperationAction(ISD::ADDCARRY, VT, Expand);
setOperationAction(ISD::SUBCARRY, VT, Expand);
+ setOperationAction(ISD::SETCCCARRY, VT, Expand);
// These default to Expand so they will be expanded to CTLZ/CTTZ by default.
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp
index 83348058eca9b..72d5e995ac225 100644
--- a/lib/CodeGen/TargetPassConfig.cpp
+++ b/lib/CodeGen/TargetPassConfig.cpp
@@ -261,9 +261,9 @@ TargetPassConfig::~TargetPassConfig() {
// Out of line constructor provides default values for pass options and
// registers all common codegen passes.
-TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
+TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)
: ImmutablePass(ID), PM(&pm), Started(true), Stopped(false),
- AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false),
+ AddingMachinePasses(false), TM(&TM), Impl(nullptr), Initialized(false),
DisableVerify(false), EnableTailMerge(true),
RequireCodeGenSCCOrder(false) {
@@ -282,9 +282,9 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
substitutePass(&PostRAMachineLICMID, &MachineLICMID);
if (StringRef(PrintMachineInstrs.getValue()).equals(""))
- TM->Options.PrintMachineCode = true;
+ TM.Options.PrintMachineCode = true;
- if (TM->Options.EnableIPRA)
+ if (TM.Options.EnableIPRA)
setRequiresCodeGenSCCOrder();
}
@@ -310,7 +310,7 @@ void TargetPassConfig::insertPass(AnalysisID TargetPassID,
///
/// Targets may override this to extend TargetPassConfig.
TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new TargetPassConfig(this, PM);
+ return new TargetPassConfig(*this, PM);
}
TargetPassConfig::TargetPassConfig()
@@ -430,7 +430,12 @@ void TargetPassConfig::addPrintPass(const std::string &Banner) {
}
void TargetPassConfig::addVerifyPass(const std::string &Banner) {
- if (VerifyMachineCode)
+ bool Verify = VerifyMachineCode;
+#ifdef EXPENSIVE_CHECKS
+ if (VerifyMachineCode == cl::BOU_UNSET)
+ Verify = TM->isMachineVerifierClean();
+#endif
+ if (Verify)
PM->add(createMachineVerifierPass(Banner));
}