diff options
Diffstat (limited to 'lib/CodeGen')
22 files changed, 1289 insertions, 277 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 43b245c66400d..5abf50e5bd10c 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -165,7 +165,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I; ++I) { unsigned Reg = *I; - if (!IsReturnBlock && !(Pristine.test(Reg) || BB->isLiveIn(Reg))) + if (!IsReturnBlock && !Pristine.test(Reg)) continue; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { unsigned AliasReg = *AI; diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d72cf59229874..e61e22abe82a7 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -949,6 +949,19 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) { MCConstantExpr::create(FrameOffset, OutContext)); } +static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF, + MachineModuleInfo *MMI) { + if (!MF.getLandingPads().empty() || MF.hasEHFunclets() || MMI->hasDebugInfo()) + return true; + + // We might emit an EH table that uses function begin and end labels even if + // we don't have any landingpads. + if (!MF.getFunction()->hasPersonalityFn()) + return false; + return !isNoOpWithoutInvoke( + classifyEHPersonality(MF.getFunction()->getPersonalityFn())); +} + /// EmitFunctionBody - This method emits the body and trailer for a /// function. void AsmPrinter::EmitFunctionBody() { @@ -1076,8 +1089,8 @@ void AsmPrinter::EmitFunctionBody() { // Emit target-specific gunk after the function body. EmitFunctionBodyEnd(); - if (!MF->getLandingPads().empty() || MMI->hasDebugInfo() || - MF->hasEHFunclets() || MAI->hasDotTypeDotSizeDirective()) { + if (needFuncLabelsForEHOrDebugInfo(*MF, MMI) || + MAI->hasDotTypeDotSizeDirective()) { // Create a symbol for the end of function. CurrentFnEnd = createTempSymbol("func_end"); OutStreamer->EmitLabel(CurrentFnEnd); @@ -1402,8 +1415,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { CurrentFnBegin = nullptr; CurExceptionSym = nullptr; bool NeedsLocalForSize = MAI->needsLocalForSize(); - if (!MF.getLandingPads().empty() || MMI->hasDebugInfo() || - MF.hasEHFunclets() || NeedsLocalForSize) { + if (needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize) { CurrentFnBegin = createTempSymbol("func_begin"); if (NeedsLocalForSize) CurrentFnSymForSize = CurrentFnBegin; diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 0a4a7a06cb2e7..e14d5be1177a6 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -309,7 +309,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, // If some instruction between the previous try-range and the end of the // function may throw, create a call-site entry with no landing pad for the // region following the try-range. - if (SawPotentiallyThrowing && !IsSJLJ && LastLabel != nullptr) { + if (SawPotentiallyThrowing && !IsSJLJ) { CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 }; CallSites.push_back(Site); } diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 2b5863aa58009..55a27e2fb79e5 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -49,6 +49,7 @@ add_llvm_library(LLVMCodeGen LivePhysRegs.cpp LiveRangeCalc.cpp LiveRangeEdit.cpp + LiveRangeShrink.cpp LiveRegMatrix.cpp LiveRegUnits.cpp LiveStackAnalysis.cpp diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 2a2715beaadca..4d30c6574b121 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -43,6 +43,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeLiveDebugValuesPass(Registry); initializeLiveDebugVariablesPass(Registry); initializeLiveIntervalsPass(Registry); + initializeLiveRangeShrinkPass(Registry); initializeLiveStacksPass(Registry); initializeLiveVariablesPass(Registry); initializeLocalStackSlotPassPass(Registry); diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 4e85708efafc1..568b278dd47cb 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -24,12 +24,13 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -60,6 +61,7 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" #include "llvm/Transforms/Utils/ValueMapper.h" + using namespace llvm; using namespace llvm::PatternMatch; @@ -84,6 +86,12 @@ STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed"); +STATISTIC(NumMemCmpCalls, "Number of memcmp calls"); +STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size"); +STATISTIC(NumMemCmpGreaterThanMax, + "Number of memcmp calls with size greater than max size"); +STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls"); + static cl::opt<bool> DisableBranchOpts( "disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare")); @@ -144,6 +152,11 @@ EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true)); +static cl::opt<unsigned> MemCmpNumLoadsPerBlock( + "memcmp-num-loads-per-block", cl::Hidden, cl::init(1), + cl::desc("The number of loads per basic block for inline expansion of " + "memcmp that is only being compared against zero.")); + namespace { typedef SmallPtrSet<Instruction *, 16> SetOfInstrs; typedef PointerIntPair<Type *, 1, bool> TypeIsSExt; @@ -1629,6 +1642,593 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, return true; } +// This class provides helper functions to expand a memcmp library call into an +// inline expansion. +class MemCmpExpansion { + struct ResultBlock { + BasicBlock *BB; + PHINode *PhiSrc1; + PHINode *PhiSrc2; + ResultBlock(); + }; + + CallInst *CI; + ResultBlock ResBlock; + unsigned MaxLoadSize; + unsigned NumBlocks; + unsigned NumBlocksNonOneByte; + unsigned NumLoadsPerBlock; + std::vector<BasicBlock *> LoadCmpBlocks; + BasicBlock *EndBlock; + PHINode *PhiRes; + bool IsUsedForZeroCmp; + int calculateNumBlocks(unsigned Size); + void createLoadCmpBlocks(); + void createResultBlock(); + void setupResultBlockPHINodes(); + void setupEndBlockPHINodes(); + void emitLoadCompareBlock(unsigned Index, int LoadSize, int GEPIndex, + bool IsLittleEndian); + void emitLoadCompareBlockMultipleLoads(unsigned Index, unsigned Size, + unsigned &NumBytesProcessed); + void emitLoadCompareByteBlock(unsigned Index, int GEPIndex); + void emitMemCmpResultBlock(bool IsLittleEndian); + Value *getMemCmpExpansionZeroCase(unsigned Size, bool IsLittleEndian); + unsigned getLoadSize(unsigned Size); + unsigned getNumLoads(unsigned Size); + +public: + MemCmpExpansion(CallInst *CI, unsigned MaxLoadSize, + unsigned NumLoadsPerBlock); + Value *getMemCmpExpansion(bool IsLittleEndian); +}; + +MemCmpExpansion::ResultBlock::ResultBlock() + : BB(nullptr), PhiSrc1(nullptr), PhiSrc2(nullptr) {} + +// Initialize the basic block structure required for expansion of memcmp call +// with given maximum load size and memcmp size parameter. +// This structure includes: +// 1. A list of load compare blocks - LoadCmpBlocks. +// 2. An EndBlock, split from original instruction point, which is the block to +// return from. +// 3. ResultBlock, block to branch to for early exit when a +// LoadCmpBlock finds a difference. +MemCmpExpansion::MemCmpExpansion(CallInst *CI, unsigned MaxLoadSize, + unsigned NumLoadsPerBlock) + : CI(CI), MaxLoadSize(MaxLoadSize), NumLoadsPerBlock(NumLoadsPerBlock) { + + IRBuilder<> Builder(CI->getContext()); + + BasicBlock *StartBlock = CI->getParent(); + EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); + setupEndBlockPHINodes(); + IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); + + ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + uint64_t Size = SizeCast->getZExtValue(); + + // Calculate how many load compare blocks are required for an expansion of + // given Size. + NumBlocks = calculateNumBlocks(Size); + createResultBlock(); + + // If return value of memcmp is not used in a zero equality, we need to + // calculate which source was larger. The calculation requires the + // two loaded source values of each load compare block. + // These will be saved in the phi nodes created by setupResultBlockPHINodes. + if (!IsUsedForZeroCmp) + setupResultBlockPHINodes(); + + // Create the number of required load compare basic blocks. + createLoadCmpBlocks(); + + // Update the terminator added by splitBasicBlock to branch to the first + // LoadCmpBlock. + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]); +} + +void MemCmpExpansion::createLoadCmpBlocks() { + for (unsigned i = 0; i < NumBlocks; i++) { + BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb", + EndBlock->getParent(), EndBlock); + LoadCmpBlocks.push_back(BB); + } +} + +void MemCmpExpansion::createResultBlock() { + ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block", + EndBlock->getParent(), EndBlock); +} + +// This function creates the IR instructions for loading and comparing 1 byte. +// It loads 1 byte from each source of the memcmp paramters with the given +// GEPIndex. It then subtracts the two loaded values and adds this result to the +// final phi node for selecting the memcmp result. +void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index, int GEPIndex) { + IRBuilder<> Builder(CI->getContext()); + + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + Builder.SetInsertPoint(LoadCmpBlocks[Index]); + Type *LoadSizeType = Type::getInt8Ty(CI->getContext()); + // Cast source to LoadSizeType* + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Get the base address using the GEPIndex + if (GEPIndex != 0) { + Source1 = Builder.CreateGEP(LoadSizeType, Source1, + ConstantInt::get(LoadSizeType, GEPIndex)); + Source2 = Builder.CreateGEP(LoadSizeType, Source2, + ConstantInt::get(LoadSizeType, GEPIndex)); + } + + Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + + LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext())); + LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext())); + Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2); + + PhiRes->addIncoming(Diff, LoadCmpBlocks[Index]); + + if (Index < (LoadCmpBlocks.size() - 1)) { + // Early exit branch if difference found to EndBlock, otherwise continue to + // next LoadCmpBlock + + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff, + ConstantInt::get(Diff->getType(), 0)); + BranchInst *CmpBr = + BranchInst::Create(EndBlock, LoadCmpBlocks[Index + 1], Cmp); + Builder.Insert(CmpBr); + } else { + // The last block has an unconditional branch to EndBlock + BranchInst *CmpBr = BranchInst::Create(EndBlock); + Builder.Insert(CmpBr); + } +} + +unsigned MemCmpExpansion::getNumLoads(unsigned Size) { + return (Size / MaxLoadSize) + countPopulation(Size % MaxLoadSize); +} + +unsigned MemCmpExpansion::getLoadSize(unsigned Size) { + return MinAlign(PowerOf2Floor(Size), MaxLoadSize); +} + +void MemCmpExpansion::emitLoadCompareBlockMultipleLoads( + unsigned Index, unsigned Size, unsigned &NumBytesProcessed) { + + IRBuilder<> Builder(CI->getContext()); + + std::vector<Value *> XorList, OrList; + Value *Diff; + + unsigned RemainingBytes = Size - NumBytesProcessed; + unsigned NumLoadsRemaining = getNumLoads(RemainingBytes); + unsigned NumLoads = std::min(NumLoadsRemaining, NumLoadsPerBlock); + + Builder.SetInsertPoint(LoadCmpBlocks[Index]); + + for (unsigned i = 0; i < NumLoads; ++i) { + unsigned LoadSize = getLoadSize(RemainingBytes); + unsigned GEPIndex = NumBytesProcessed / LoadSize; + NumBytesProcessed += LoadSize; + RemainingBytes -= LoadSize; + + Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8); + Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + // Cast source to LoadSizeType* + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Get the base address using the GEPIndex + if (GEPIndex != 0) { + Source1 = Builder.CreateGEP(LoadSizeType, Source1, + ConstantInt::get(LoadSizeType, GEPIndex)); + Source2 = Builder.CreateGEP(LoadSizeType, Source2, + ConstantInt::get(LoadSizeType, GEPIndex)); + } + + // Load LoadSizeType from the base address + Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + if (LoadSizeType != MaxLoadType) { + LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType); + LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType); + } + Diff = Builder.CreateXor(LoadSrc1, LoadSrc2); + Diff = Builder.CreateZExtOrTrunc(Diff, MaxLoadType); + XorList.push_back(Diff); + } + + auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> { + std::vector<Value *> OutList; + for (unsigned i = 0; i < InList.size() - 1; i = i + 2) { + Value *Or = Builder.CreateOr(InList[i], InList[i + 1]); + OutList.push_back(Or); + } + if (InList.size() % 2 != 0) + OutList.push_back(InList.back()); + return OutList; + }; + + // Pair wise OR the XOR results + OrList = pairWiseOr(XorList); + + // Pair wise OR the OR results until one result left + while (OrList.size() != 1) { + OrList = pairWiseOr(OrList); + } + + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, OrList[0], + ConstantInt::get(Diff->getType(), 0)); + BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1)) + ? EndBlock + : LoadCmpBlocks[Index + 1]; + // Early exit branch if difference found to ResultBlock, otherwise continue to + // next LoadCmpBlock or EndBlock. + BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp); + Builder.Insert(CmpBr); + + // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 + // since early exit to ResultBlock was not taken (no difference was found in + // any of the bytes) + if (Index == LoadCmpBlocks.size() - 1) { + Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); + PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]); + } +} + +// This function creates the IR intructions for loading and comparing using the +// given LoadSize. It loads the number of bytes specified by LoadSize from each +// source of the memcmp parameters. It then does a subtract to see if there was +// a difference in the loaded values. If a difference is found, it branches +// with an early exit to the ResultBlock for calculating which source was +// larger. Otherwise, it falls through to the either the next LoadCmpBlock or +// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with +// a special case through emitLoadCompareByteBlock. The special handling can +// simply subtract the loaded values and add it to the result phi node. +void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize, + int GEPIndex, bool IsLittleEndian) { + if (LoadSize == 1) { + MemCmpExpansion::emitLoadCompareByteBlock(Index, GEPIndex); + return; + } + + IRBuilder<> Builder(CI->getContext()); + + Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8); + Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + Builder.SetInsertPoint(LoadCmpBlocks[Index]); + // Cast source to LoadSizeType* + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Get the base address using the GEPIndex + if (GEPIndex != 0) { + Source1 = Builder.CreateGEP(LoadSizeType, Source1, + ConstantInt::get(LoadSizeType, GEPIndex)); + Source2 = Builder.CreateGEP(LoadSizeType, Source2, + ConstantInt::get(LoadSizeType, GEPIndex)); + } + + // Load LoadSizeType from the base address + Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + + if (IsLittleEndian) { + Function *F = LoadCmpBlocks[Index]->getParent(); + + Function *Bswap = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::bswap, LoadSizeType); + LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); + LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); + } + + if (LoadSizeType != MaxLoadType) { + LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType); + LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType); + } + + // Add the loaded values to the phi nodes for calculating memcmp result only + // if result is not used in a zero equality. + if (!IsUsedForZeroCmp) { + ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[Index]); + ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[Index]); + } + + Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2); + + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff, + ConstantInt::get(Diff->getType(), 0)); + BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1)) + ? EndBlock + : LoadCmpBlocks[Index + 1]; + // Early exit branch if difference found to ResultBlock, otherwise continue to + // next LoadCmpBlock or EndBlock. + BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp); + Builder.Insert(CmpBr); + + // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 + // since early exit to ResultBlock was not taken (no difference was found in + // any of the bytes) + if (Index == LoadCmpBlocks.size() - 1) { + Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); + PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]); + } +} + +// This function populates the ResultBlock with a sequence to calculate the +// memcmp result. It compares the two loaded source values and returns -1 if +// src1 < src2 and 1 if src1 > src2. +void MemCmpExpansion::emitMemCmpResultBlock(bool IsLittleEndian) { + IRBuilder<> Builder(CI->getContext()); + + // Special case: if memcmp result is used in a zero equality, result does not + // need to be calculated and can simply return 1. + if (IsUsedForZeroCmp) { + BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); + Builder.SetInsertPoint(ResBlock.BB, InsertPt); + Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1); + PhiRes->addIncoming(Res, ResBlock.BB); + BranchInst *NewBr = BranchInst::Create(EndBlock); + Builder.Insert(NewBr); + return; + } + BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); + Builder.SetInsertPoint(ResBlock.BB, InsertPt); + + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1, + ResBlock.PhiSrc2); + + Value *Res = + Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1), + ConstantInt::get(Builder.getInt32Ty(), 1)); + + BranchInst *NewBr = BranchInst::Create(EndBlock); + Builder.Insert(NewBr); + PhiRes->addIncoming(Res, ResBlock.BB); +} + +int MemCmpExpansion::calculateNumBlocks(unsigned Size) { + int NumBlocks = 0; + bool haveOneByteLoad = false; + unsigned RemainingSize = Size; + unsigned LoadSize = MaxLoadSize; + while (RemainingSize) { + if (LoadSize == 1) + haveOneByteLoad = true; + NumBlocks += RemainingSize / LoadSize; + RemainingSize = RemainingSize % LoadSize; + LoadSize = LoadSize / 2; + } + NumBlocksNonOneByte = haveOneByteLoad ? (NumBlocks - 1) : NumBlocks; + + if (IsUsedForZeroCmp) + NumBlocks = NumBlocks / NumLoadsPerBlock + + (NumBlocks % NumLoadsPerBlock != 0 ? 1 : 0); + + return NumBlocks; +} + +void MemCmpExpansion::setupResultBlockPHINodes() { + IRBuilder<> Builder(CI->getContext()); + Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + Builder.SetInsertPoint(ResBlock.BB); + ResBlock.PhiSrc1 = + Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src1"); + ResBlock.PhiSrc2 = + Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src2"); +} + +void MemCmpExpansion::setupEndBlockPHINodes() { + IRBuilder<> Builder(CI->getContext()); + + Builder.SetInsertPoint(&EndBlock->front()); + PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res"); +} + +Value *MemCmpExpansion::getMemCmpExpansionZeroCase(unsigned Size, + bool IsLittleEndian) { + unsigned NumBytesProcessed = 0; + // This loop populates each of the LoadCmpBlocks with IR sequence to handle + // multiple loads per block + for (unsigned i = 0; i < NumBlocks; ++i) { + emitLoadCompareBlockMultipleLoads(i, Size, NumBytesProcessed); + } + + emitMemCmpResultBlock(IsLittleEndian); + return PhiRes; +} + +// This function expands the memcmp call into an inline expansion and returns +// the memcmp result. +Value *MemCmpExpansion::getMemCmpExpansion(bool IsLittleEndian) { + + ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + uint64_t Size = SizeCast->getZExtValue(); + + int LoadSize = MaxLoadSize; + int NumBytesToBeProcessed = Size; + + if (IsUsedForZeroCmp) { + return getMemCmpExpansionZeroCase(Size, IsLittleEndian); + } + + unsigned Index = 0; + // This loop calls emitLoadCompareBlock for comparing SizeVal bytes of the two + // memcmp source. It starts with loading using the maximum load size set by + // the target. It processes any remaining bytes using a load size which is the + // next smallest power of 2. + while (NumBytesToBeProcessed) { + // Calculate how many blocks we can create with the current load size + int NumBlocks = NumBytesToBeProcessed / LoadSize; + int GEPIndex = (Size - NumBytesToBeProcessed) / LoadSize; + NumBytesToBeProcessed = NumBytesToBeProcessed % LoadSize; + + // For each NumBlocks, populate the instruction sequence for loading and + // comparing LoadSize bytes + while (NumBlocks--) { + emitLoadCompareBlock(Index, LoadSize, GEPIndex, IsLittleEndian); + Index++; + GEPIndex++; + } + // Get the next LoadSize to use + LoadSize = LoadSize / 2; + } + + emitMemCmpResultBlock(IsLittleEndian); + return PhiRes; +} + +// This function checks to see if an expansion of memcmp can be generated. +// It checks for constant compare size that is less than the max inline size. +// If an expansion cannot occur, returns false to leave as a library call. +// Otherwise, the library call is replaced wtih new IR instruction sequence. +/// We want to transform: +/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15) +/// To: +/// loadbb: +/// %0 = bitcast i32* %buffer2 to i8* +/// %1 = bitcast i32* %buffer1 to i8* +/// %2 = bitcast i8* %1 to i64* +/// %3 = bitcast i8* %0 to i64* +/// %4 = load i64, i64* %2 +/// %5 = load i64, i64* %3 +/// %6 = call i64 @llvm.bswap.i64(i64 %4) +/// %7 = call i64 @llvm.bswap.i64(i64 %5) +/// %8 = sub i64 %6, %7 +/// %9 = icmp ne i64 %8, 0 +/// br i1 %9, label %res_block, label %loadbb1 +/// res_block: ; preds = %loadbb2, +/// %loadbb1, %loadbb +/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ] +/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ] +/// %10 = icmp ult i64 %phi.src1, %phi.src2 +/// %11 = select i1 %10, i32 -1, i32 1 +/// br label %endblock +/// loadbb1: ; preds = %loadbb +/// %12 = bitcast i32* %buffer2 to i8* +/// %13 = bitcast i32* %buffer1 to i8* +/// %14 = bitcast i8* %13 to i32* +/// %15 = bitcast i8* %12 to i32* +/// %16 = getelementptr i32, i32* %14, i32 2 +/// %17 = getelementptr i32, i32* %15, i32 2 +/// %18 = load i32, i32* %16 +/// %19 = load i32, i32* %17 +/// %20 = call i32 @llvm.bswap.i32(i32 %18) +/// %21 = call i32 @llvm.bswap.i32(i32 %19) +/// %22 = zext i32 %20 to i64 +/// %23 = zext i32 %21 to i64 +/// %24 = sub i64 %22, %23 +/// %25 = icmp ne i64 %24, 0 +/// br i1 %25, label %res_block, label %loadbb2 +/// loadbb2: ; preds = %loadbb1 +/// %26 = bitcast i32* %buffer2 to i8* +/// %27 = bitcast i32* %buffer1 to i8* +/// %28 = bitcast i8* %27 to i16* +/// %29 = bitcast i8* %26 to i16* +/// %30 = getelementptr i16, i16* %28, i16 6 +/// %31 = getelementptr i16, i16* %29, i16 6 +/// %32 = load i16, i16* %30 +/// %33 = load i16, i16* %31 +/// %34 = call i16 @llvm.bswap.i16(i16 %32) +/// %35 = call i16 @llvm.bswap.i16(i16 %33) +/// %36 = zext i16 %34 to i64 +/// %37 = zext i16 %35 to i64 +/// %38 = sub i64 %36, %37 +/// %39 = icmp ne i64 %38, 0 +/// br i1 %39, label %res_block, label %loadbb3 +/// loadbb3: ; preds = %loadbb2 +/// %40 = bitcast i32* %buffer2 to i8* +/// %41 = bitcast i32* %buffer1 to i8* +/// %42 = getelementptr i8, i8* %41, i8 14 +/// %43 = getelementptr i8, i8* %40, i8 14 +/// %44 = load i8, i8* %42 +/// %45 = load i8, i8* %43 +/// %46 = zext i8 %44 to i32 +/// %47 = zext i8 %45 to i32 +/// %48 = sub i32 %46, %47 +/// br label %endblock +/// endblock: ; preds = %res_block, +/// %loadbb3 +/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ] +/// ret i32 %phi.res +static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, + const TargetLowering *TLI, const DataLayout *DL) { + NumMemCmpCalls++; + IRBuilder<> Builder(CI->getContext()); + + // TTI call to check if target would like to expand memcmp and get the + // MaxLoadSize + unsigned MaxLoadSize; + if (!TTI->expandMemCmp(CI, MaxLoadSize)) + return false; + + // Early exit from expansion if -Oz + if (CI->getParent()->getParent()->optForMinSize()) { + return false; + } + + // Early exit from expansion if size is not a constant + ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + if (!SizeCast) { + NumMemCmpNotConstant++; + return false; + } + + // Early exit from expansion if size greater than max bytes to load + uint64_t SizeVal = SizeCast->getZExtValue(); + + unsigned NumLoads = 0; + unsigned RemainingSize = SizeVal; + unsigned LoadSize = MaxLoadSize; + while (RemainingSize) { + NumLoads += RemainingSize / LoadSize; + RemainingSize = RemainingSize % LoadSize; + LoadSize = LoadSize / 2; + } + + if (NumLoads > + TLI->getMaxExpandSizeMemcmp(CI->getParent()->getParent()->optForSize())) { + NumMemCmpGreaterThanMax++; + return false; + } + + NumMemCmpInlined++; + + // MemCmpHelper object, creates and sets up basic blocks required for + // expanding memcmp with size SizeVal + unsigned NumLoadsPerBlock = MemCmpNumLoadsPerBlock; + MemCmpExpansion MemCmpHelper(CI, MaxLoadSize, NumLoadsPerBlock); + + Value *Res = MemCmpHelper.getMemCmpExpansion(DL->isLittleEndian()); + + // Replace call with result of expansion and erarse call. + CI->replaceAllUsesWith(Res); + CI->eraseFromParent(); + + return true; +} + bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { BasicBlock *BB = CI->getParent(); @@ -1780,6 +2380,15 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { CI->eraseFromParent(); return true; } + + LibFunc Func; + if (TLInfo->getLibFunc(*CI->getCalledFunction(), Func) && + Func == LibFunc_memcmp) { + if (expandMemCmp(CI, TTI, TLI, DL)) { + ModifiedDT = true; + return true; + } + } return false; } @@ -4927,6 +5536,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { return true; } + namespace { /// \brief Helper class to promote a scalar operation to a vector one. /// This class is used to move downward extractelement transition. diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index b2d6652b075e7..a3cf2846d2f5d 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -74,7 +74,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I; ++I) { unsigned Reg = *I; - if (!IsReturnBlock && !(Pristine.test(Reg) || BB->isLiveIn(Reg))) + if (!IsReturnBlock && !Pristine.test(Reg)) continue; for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { unsigned Reg = *AI; diff --git a/lib/CodeGen/GlobalISel/Localizer.cpp b/lib/CodeGen/GlobalISel/Localizer.cpp index c2a568e4b4521..c5d0999fe4388 100644 --- a/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/lib/CodeGen/GlobalISel/Localizer.cpp @@ -98,12 +98,10 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) { // Create the localized instruction. MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI); LocalizedInstrs.insert(LocalizedMI); - // Move it at the right place. - MachineInstr &MIUse = *MOUse.getParent(); - if (MIUse.getParent() == InsertMBB) - InsertMBB->insert(MIUse, LocalizedMI); - else - InsertMBB->insert(InsertMBB->getFirstNonPHI(), LocalizedMI); + // Don't try to be smart for the insertion point. + // There is no guarantee that the first seen use is the first + // use in the block. + InsertMBB->insert(InsertMBB->getFirstNonPHI(), LocalizedMI); // Set a new register for the definition. unsigned NewReg = diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp index 24e289dd4f1b0..444416a77008c 100644 --- a/lib/CodeGen/ImplicitNullChecks.cpp +++ b/lib/CodeGen/ImplicitNullChecks.cpp @@ -607,8 +607,20 @@ MachineInstr *ImplicitNullChecks::insertFaultingInstr( .addMBB(HandlerMBB) .addImm(MI->getOpcode()); - for (auto &MO : MI->uses()) - MIB.add(MO); + for (auto &MO : MI->uses()) { + if (MO.isReg()) { + MachineOperand NewMO = MO; + if (MO.isUse()) { + NewMO.setIsKill(false); + } else { + assert(MO.isDef() && "Expected def or use"); + NewMO.setIsDead(false); + } + MIB.add(NewMO); + } else { + MIB.add(MO); + } + } MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); diff --git a/lib/CodeGen/LiveRangeShrink.cpp b/lib/CodeGen/LiveRangeShrink.cpp new file mode 100644 index 0000000000000..552f4b5393fef --- /dev/null +++ b/lib/CodeGen/LiveRangeShrink.cpp @@ -0,0 +1,231 @@ +//===-- LiveRangeShrink.cpp - Move instructions to shrink live range ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +///===---------------------------------------------------------------------===// +/// +/// \file +/// This pass moves instructions close to the definition of its operands to +/// shrink live range of the def instruction. The code motion is limited within +/// the basic block. The moved instruction should have 1 def, and more than one +/// uses, all of which are the only use of the def. +/// +///===---------------------------------------------------------------------===// +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "lrshrink" + +STATISTIC(NumInstrsHoistedToShrinkLiveRange, + "Number of insructions hoisted to shrink live range."); + +using namespace llvm; + +namespace { +class LiveRangeShrink : public MachineFunctionPass { +public: + static char ID; + + LiveRangeShrink() : MachineFunctionPass(ID) { + initializeLiveRangeShrinkPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return "Live Range Shrink"; } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // End anonymous namespace. + +char LiveRangeShrink::ID = 0; +char &llvm::LiveRangeShrinkID = LiveRangeShrink::ID; + +INITIALIZE_PASS(LiveRangeShrink, "lrshrink", "Live Range Shrink Pass", false, + false) +namespace { +typedef DenseMap<MachineInstr *, unsigned> InstOrderMap; + +/// Returns \p New if it's dominated by \p Old, otherwise return \p Old. +/// \p M maintains a map from instruction to its dominating order that satisfies +/// M[A] > M[B] guarantees that A is dominated by B. +/// If \p New is not in \p M, return \p Old. Otherwise if \p Old is null, return +/// \p New. +MachineInstr *FindDominatedInstruction(MachineInstr &New, MachineInstr *Old, + const InstOrderMap &M) { + auto NewIter = M.find(&New); + if (NewIter == M.end()) + return Old; + if (Old == nullptr) + return &New; + unsigned OrderOld = M.find(Old)->second; + unsigned OrderNew = NewIter->second; + if (OrderOld != OrderNew) + return OrderOld < OrderNew ? &New : Old; + // OrderOld == OrderNew, we need to iterate down from Old to see if it + // can reach New, if yes, New is dominated by Old. + for (MachineInstr *I = Old->getNextNode(); M.find(I)->second == OrderNew; + I = I->getNextNode()) + if (I == &New) + return &New; + return Old; +} + +/// Builds Instruction to its dominating order number map \p M by traversing +/// from instruction \p Start. +void BuildInstOrderMap(MachineBasicBlock::iterator Start, InstOrderMap &M) { + M.clear(); + unsigned i = 0; + for (MachineInstr &I : make_range(Start, Start->getParent()->end())) + M[&I] = i++; +} +} // end anonymous namespace + +bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + + MachineRegisterInfo &MRI = MF.getRegInfo(); + + DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); + + InstOrderMap IOM; + // Map from register to instruction order (value of IOM) where the + // register is used last. When moving instructions up, we need to + // make sure all its defs (including dead def) will not cross its + // last use when moving up. + DenseMap<unsigned, std::pair<unsigned, MachineInstr *>> UseMap; + + for (MachineBasicBlock &MBB : MF) { + if (MBB.empty()) + continue; + bool SawStore = false; + BuildInstOrderMap(MBB.begin(), IOM); + UseMap.clear(); + + for (MachineBasicBlock::iterator Next = MBB.begin(); Next != MBB.end();) { + MachineInstr &MI = *Next; + ++Next; + if (MI.isPHI() || MI.isDebugValue()) + continue; + if (MI.mayStore()) + SawStore = true; + + unsigned CurrentOrder = IOM[&MI]; + unsigned Barrier = 0; + MachineInstr *BarrierMI = nullptr; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || MO.isDebug()) + continue; + if (MO.isUse()) + UseMap[MO.getReg()] = std::make_pair(CurrentOrder, &MI); + else if (MO.isDead() && UseMap.count(MO.getReg())) + // Barrier is the last instruction where MO get used. MI should not + // be moved above Barrier. + if (Barrier < UseMap[MO.getReg()].first) { + Barrier = UseMap[MO.getReg()].first; + BarrierMI = UseMap[MO.getReg()].second; + } + } + + if (!MI.isSafeToMove(nullptr, SawStore)) { + // If MI has side effects, it should become a barrier for code motion. + // IOM is rebuild from the next instruction to prevent later + // instructions from being moved before this MI. + if (MI.hasUnmodeledSideEffects() && Next != MBB.end()) { + BuildInstOrderMap(Next, IOM); + SawStore = false; + } + continue; + } + + const MachineOperand *DefMO = nullptr; + MachineInstr *Insert = nullptr; + + // Number of live-ranges that will be shortened. We do not count + // live-ranges that are defined by a COPY as it could be coalesced later. + unsigned NumEligibleUse = 0; + + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || MO.isDead() || MO.isDebug()) + continue; + unsigned Reg = MO.getReg(); + // Do not move the instruction if it def/uses a physical register, + // unless it is a constant physical register or a noreg. + if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + if (!Reg || MRI.isConstantPhysReg(Reg)) + continue; + Insert = nullptr; + break; + } + if (MO.isDef()) { + // Do not move if there is more than one def. + if (DefMO) { + Insert = nullptr; + break; + } + DefMO = &MO; + } else if (MRI.hasOneNonDBGUse(Reg) && MRI.hasOneDef(Reg) && DefMO && + MRI.getRegClass(DefMO->getReg()) == + MRI.getRegClass(MO.getReg())) { + // The heuristic does not handle different register classes yet + // (registers of different sizes, looser/tighter constraints). This + // is because it needs more accurate model to handle register + // pressure correctly. + MachineInstr &DefInstr = *MRI.def_instr_begin(Reg); + if (!DefInstr.isCopy()) + NumEligibleUse++; + Insert = FindDominatedInstruction(DefInstr, Insert, IOM); + } else { + Insert = nullptr; + break; + } + } + + // If Barrier equals IOM[I], traverse forward to find if BarrierMI is + // after Insert, if yes, then we should not hoist. + for (MachineInstr *I = Insert; I && IOM[I] == Barrier; + I = I->getNextNode()) + if (I == BarrierMI) { + Insert = nullptr; + break; + } + // Move the instruction when # of shrunk live range > 1. + if (DefMO && Insert && NumEligibleUse > 1 && Barrier <= IOM[Insert]) { + MachineBasicBlock::iterator I = std::next(Insert->getIterator()); + // Skip all the PHI and debug instructions. + while (I != MBB.end() && (I->isPHI() || I->isDebugValue())) + I = std::next(I); + if (I == MI.getIterator()) + continue; + + // Update the dominator order to be the same as the insertion point. + // We do this to maintain a non-decreasing order without need to update + // all instruction orders after the insertion point. + unsigned NewOrder = IOM[&*I]; + IOM[&MI] = NewOrder; + NumInstrsHoistedToShrinkLiveRange++; + + // Find MI's debug value following MI. + MachineBasicBlock::iterator EndIter = std::next(MI.getIterator()); + if (MI.getOperand(0).isReg()) + for (; EndIter != MBB.end() && EndIter->isDebugValue() && + EndIter->getOperand(0).isReg() && + EndIter->getOperand(0).getReg() == MI.getOperand(0).getReg(); + ++EndIter, ++Next) + IOM[&*EndIter] = NewOrder; + MBB.splice(I, &MBB, MI.getIterator(), EndIter); + } + } + } + return false; +} diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp index bd04acd049dba..ff12297e3fc67 100644 --- a/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/lib/CodeGen/MIRParser/MIRParser.cpp @@ -332,8 +332,6 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) { MF.setAlignment(YamlMF.Alignment); MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); - if (YamlMF.NoVRegs) - MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); if (YamlMF.Legalized) MF.getProperties().set(MachineFunctionProperties::Property::Legalized); if (YamlMF.RegBankSelected) diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp index 6f6a67d81b0fe..293fc7358b8ed 100644 --- a/lib/CodeGen/MIRPrinter.cpp +++ b/lib/CodeGen/MIRPrinter.cpp @@ -183,8 +183,6 @@ void MIRPrinter::print(const MachineFunction &MF) { YamlMF.Alignment = MF.getAlignment(); YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice(); - YamlMF.NoVRegs = MF.getProperties().hasProperty( - MachineFunctionProperties::Property::NoVRegs); YamlMF.Legalized = MF.getProperties().hasProperty( MachineFunctionProperties::Property::Legalized); YamlMF.RegBankSelected = MF.getProperties().hasProperty( diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 06112723497b0..590acc01008a6 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -350,6 +350,13 @@ void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) { LiveIns.erase(I); } +MachineBasicBlock::livein_iterator +MachineBasicBlock::removeLiveIn(MachineBasicBlock::livein_iterator I) { + // Get non-const version of iterator. + LiveInVector::iterator LI = LiveIns.begin() + (I - LiveIns.begin()); + return LiveIns.erase(LI); +} + bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const { livein_iterator I = find_if( LiveIns, [Reg](const RegisterMaskPair &LI) { return LI.PhysReg == Reg; }); diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index d665201a5d17c..306b75dbbae7e 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -1,4 +1,4 @@ -//===-- lib/CodeGen/MachineInstr.cpp --------------------------------------===// +//===- lib/CodeGen/MachineInstr.cpp ---------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -11,21 +11,34 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" @@ -35,9 +48,13 @@ #include "llvm/IR/Value.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -45,6 +62,14 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <utility> + using namespace llvm; static cl::opt<bool> PrintWholeRegMask( @@ -256,7 +281,7 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { case MachineOperand::MO_GlobalAddress: return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset(); case MachineOperand::MO_ExternalSymbol: - return !strcmp(getSymbolName(), Other.getSymbolName()) && + return strcmp(getSymbolName(), Other.getSymbolName()) == 0 && getOffset() == Other.getOffset(); case MachineOperand::MO_BlockAddress: return getBlockAddress() == Other.getBlockAddress() && @@ -723,9 +748,7 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) { /// the MCInstrDesc. MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, DebugLoc dl, bool NoImp) - : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0), - AsmPrinterFlags(0), NumMemRefs(0), MemRefs(nullptr), - debugLoc(std::move(dl)) { + : MCID(&tid), debugLoc(std::move(dl)) { assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); // Reserve space for the expected number of operands. @@ -742,9 +765,8 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, /// MachineInstr ctor - Copies MachineInstr arg exactly /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) - : MCID(&MI.getDesc()), Parent(nullptr), Operands(nullptr), NumOperands(0), - Flags(0), AsmPrinterFlags(0), NumMemRefs(MI.NumMemRefs), - MemRefs(MI.MemRefs), debugLoc(MI.getDebugLoc()) { + : MCID(&MI.getDesc()), NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs), + debugLoc(MI.getDebugLoc()) { assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); CapOperands = OperandCapacity::get(MI.getNumOperands()); @@ -1633,8 +1655,8 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other, // memory objects. It can save compile time, and possibly catch some // corner cases not currently covered. - assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset"); - assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset"); + assert((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset"); + assert((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset"); int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset()); int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset; @@ -1667,7 +1689,7 @@ bool MachineInstr::hasOrderedMemoryRef() const { return true; // Check if any of our memory operands are ordered. - return any_of(memoperands(), [](const MachineMemOperand *MMO) { + return llvm::any_of(memoperands(), [](const MachineMemOperand *MMO) { return !MMO->isUnordered(); }); } @@ -1841,7 +1863,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, return; // Print the rest of the operands. - bool OmittedAnyCallClobbers = false; bool FirstOp = true; unsigned AsmDescOp = ~0u; unsigned AsmOpCount = 0; @@ -1878,31 +1899,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) VirtRegs.push_back(MO.getReg()); - // Omit call-clobbered registers which aren't used anywhere. This makes - // call instructions much less noisy on targets where calls clobber lots - // of registers. Don't rely on MO.isDead() because we may be called before - // LiveVariables is run, or we may be looking at a non-allocatable reg. - if (MRI && isCall() && - MO.isReg() && MO.isImplicit() && MO.isDef()) { - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - if (MRI->use_empty(Reg)) { - bool HasAliasLive = false; - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - unsigned AliasReg = *AI; - if (!MRI->use_empty(AliasReg)) { - HasAliasLive = true; - break; - } - } - if (!HasAliasLive) { - OmittedAnyCallClobbers = true; - continue; - } - } - } - } - if (FirstOp) FirstOp = false; else OS << ","; OS << " "; if (i < getDesc().NumOperands) { @@ -1984,12 +1980,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, MO.print(OS, MST, TRI); } - // Briefly indicate whether any call clobbers were omitted. - if (OmittedAnyCallClobbers) { - if (!FirstOp) OS << ","; - OS << " ..."; - } - bool HaveSemi = false; const unsigned PrintableFlags = FrameSetup | FrameDestroy; if (Flags & PrintableFlags) { @@ -2255,8 +2245,8 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs, unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue; // If there are no uses, including partial uses, the def is dead. - if (none_of(UsedRegs, - [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); })) + if (llvm::none_of(UsedRegs, + [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); })) MO.setIsDead(); } diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 6cf751d34e268..c1b72430e6053 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -7,27 +7,34 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" -#include "llvm/Analysis/EHPersonalities.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionInitializer.h" -#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Constants.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" -#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Dwarf.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" +#include <algorithm> +#include <cassert> +#include <memory> +#include <utility> +#include <vector> + using namespace llvm; using namespace llvm::dwarf; @@ -37,14 +44,16 @@ INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo", char MachineModuleInfo::ID = 0; // Out of line virtual method. -MachineModuleInfoImpl::~MachineModuleInfoImpl() {} +MachineModuleInfoImpl::~MachineModuleInfoImpl() = default; namespace llvm { + class MMIAddrLabelMapCallbackPtr final : CallbackVH { - MMIAddrLabelMap *Map; + MMIAddrLabelMap *Map = nullptr; + public: - MMIAddrLabelMapCallbackPtr() : Map(nullptr) {} - MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(nullptr) {} + MMIAddrLabelMapCallbackPtr() = default; + MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V) {} void setPtr(BasicBlock *BB) { ValueHandleBase::operator=(BB); @@ -75,11 +84,12 @@ class MMIAddrLabelMap { /// This is a per-function list of symbols whose corresponding BasicBlock got /// deleted. These symbols need to be emitted at some point in the file, so /// AsmPrinter emits them after the function body. - DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> > + DenseMap<AssertingVH<Function>, std::vector<MCSymbol*>> DeletedAddrLabelsNeedingEmission; -public: +public: MMIAddrLabelMap(MCContext &context) : Context(context) {} + ~MMIAddrLabelMap() { assert(DeletedAddrLabelsNeedingEmission.empty() && "Some labels for deleted blocks never got emitted"); @@ -93,7 +103,8 @@ public: void UpdateForDeletedBlock(BasicBlock *BB); void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New); }; -} + +} // end namespace llvm ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { assert(BB->hasAddressTaken() && @@ -119,7 +130,7 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { /// If we have any deleted symbols for F, return them. void MMIAddrLabelMap:: takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) { - DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >::iterator I = + DenseMap<AssertingVH<Function>, std::vector<MCSymbol*>>::iterator I = DeletedAddrLabelsNeedingEmission.find(F); // If there are no entries for the function, just return. @@ -130,7 +141,6 @@ takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) { DeletedAddrLabelsNeedingEmission.erase(I); } - void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) { // If the block got deleted, there is no need for the symbol. If the symbol // was already emitted, we can just forget about it, otherwise we need to @@ -177,7 +187,6 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) { OldEntry.Symbols.end()); } - void MMIAddrLabelMapCallbackPtr::deleted() { Map->UpdateForDeletedBlock(cast<BasicBlock>(getValPtr())); } @@ -186,9 +195,6 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2)); } - -//===----------------------------------------------------------------------===// - MachineModuleInfo::MachineModuleInfo(const TargetMachine *TM) : ImmutablePass(ID), TM(*TM), Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(), @@ -196,11 +202,9 @@ MachineModuleInfo::MachineModuleInfo(const TargetMachine *TM) initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); } -MachineModuleInfo::~MachineModuleInfo() { -} +MachineModuleInfo::~MachineModuleInfo() = default; bool MachineModuleInfo::doInitialization(Module &M) { - ObjFileMMI = nullptr; CurCallSite = 0; DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false; @@ -211,7 +215,6 @@ bool MachineModuleInfo::doInitialization(Module &M) { } bool MachineModuleInfo::doFinalization(Module &M) { - Personalities.clear(); delete AddrLabelSymbols; @@ -290,10 +293,12 @@ void MachineModuleInfo::deleteMachineFunctionFor(Function &F) { } namespace { + /// This pass frees the MachineFunction object associated with a Function. class FreeMachineFunction : public FunctionPass { public: static char ID; + FreeMachineFunction() : FunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -311,14 +316,14 @@ public: return "Free MachineFunction"; } }; -char FreeMachineFunction::ID; + } // end anonymous namespace -namespace llvm { -FunctionPass *createFreeMachineFunctionPass() { +char FreeMachineFunction::ID; + +FunctionPass *llvm::createFreeMachineFunctionPass() { return new FreeMachineFunction(); } -} // end namespace llvm //===- MMI building helpers -----------------------------------------------===// diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ab36bc1417aee..fb51a4eb14212 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -280,6 +280,7 @@ namespace { SDValue visitSELECT_CC(SDNode *N); SDValue visitSETCC(SDNode *N); SDValue visitSETCCE(SDNode *N); + SDValue visitSETCCCARRY(SDNode *N); SDValue visitSIGN_EXTEND(SDNode *N); SDValue visitZERO_EXTEND(SDNode *N); SDValue visitANY_EXTEND(SDNode *N); @@ -1457,6 +1458,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SELECT_CC: return visitSELECT_CC(N); case ISD::SETCC: return visitSETCC(N); case ISD::SETCCE: return visitSETCCE(N); + case ISD::SETCCCARRY: return visitSETCCCARRY(N); case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); case ISD::ANY_EXTEND: return visitANY_EXTEND(N); @@ -1958,7 +1960,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // fold (a+b) -> (a|b) iff a and b share no bits. if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && - VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1)) + DAG.haveNoCommonBitsSet(N0, N1)) return DAG.getNode(ISD::OR, DL, VT, N0, N1); if (SDValue Combined = visitADDLike(N0, N1, N)) @@ -1970,6 +1972,44 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return SDValue(); } +static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) { + bool Masked = false; + + // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization. + while (true) { + if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) { + V = V.getOperand(0); + continue; + } + + if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) { + Masked = true; + V = V.getOperand(0); + continue; + } + + break; + } + + // If this is not a carry, return. + if (V.getResNo() != 1) + return SDValue(); + + if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY && + V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO) + return SDValue(); + + // If the result is masked, then no matter what kind of bool it is we can + // return. If it isn't, then we need to make sure the bool type is either 0 or + // 1 and not other values. + if (Masked || + TLI.getBooleanContents(V.getValueType()) == + TargetLoweringBase::ZeroOrOneBooleanContent) + return V; + + return SDValue(); +} + SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) { EVT VT = N0.getValueType(); SDLoc DL(LocReference); @@ -2017,6 +2057,13 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(), N0, N1.getOperand(0), N1.getOperand(2)); + // (add X, Carry) -> (addcarry X, 0, Carry) + if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) + if (SDValue Carry = getAsCarry(TLI, N1)) + return DAG.getNode(ISD::ADDCARRY, DL, + DAG.getVTList(VT, Carry.getValueType()), N0, + DAG.getConstant(0, DL, VT), Carry); + return SDValue(); } @@ -2090,6 +2137,8 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) { } SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) { + auto VT = N0.getValueType(); + // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) // If Y + 1 cannot overflow. if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) { @@ -2100,6 +2149,12 @@ SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) { N1.getOperand(2)); } + // (uaddo X, Carry) -> (addcarry X, 0, Carry) + if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) + if (SDValue Carry = getAsCarry(TLI, N1)) + return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, + DAG.getConstant(0, SDLoc(N), VT), Carry); + return SDValue(); } @@ -2167,6 +2222,41 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0.getOperand(0), N0.getOperand(1), CarryIn); + /** + * When one of the addcarry argument is itself a carry, we may be facing + * a diamond carry propagation. In which case we try to transform the DAG + * to ensure linear carry propagation if that is possible. + * + * We are trying to get: + * (addcarry X, 0, (addcarry A, B, Z):Carry) + */ + if (auto Y = getAsCarry(TLI, N1)) { + /** + * (uaddo A, B) + * / \ + * Carry Sum + * | \ + * | (addcarry *, 0, Z) + * | / + * \ Carry + * | / + * (addcarry X, *, *) + */ + if (Y.getOpcode() == ISD::UADDO && + CarryIn.getResNo() == 1 && + CarryIn.getOpcode() == ISD::ADDCARRY && + isNullConstant(CarryIn.getOperand(1)) && + CarryIn.getOperand(0) == Y.getValue(0)) { + auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(), + Y.getOperand(0), Y.getOperand(1), + CarryIn.getOperand(2)); + AddToWorklist(NewY.getNode()); + return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, + DAG.getConstant(0, SDLoc(N), N0.getValueType()), + NewY.getValue(1)); + } + } + return SDValue(); } @@ -6754,6 +6844,19 @@ SDValue DAGCombiner::visitSETCCE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + SDValue Cond = N->getOperand(3); + + // If Carry is false, fold to a regular SETCC. + if (isNullConstant(Carry)) + return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond); + + return SDValue(); +} + /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or /// a build_vector of constants. /// This function is called by the DAGCombiner when visiting sext/zext/aext @@ -7124,12 +7227,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(), LN0->getBasePtr(), N0.getValueType(), LN0->getMemOperand()); - CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); - CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); + return CombineTo(N, ExtLoad); // Return N so it doesn't get rechecked! } } @@ -7185,10 +7287,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); - CombineTo(N, And); - CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); + return CombineTo(N, And); // Return N so it doesn't get rechecked! } } } @@ -7427,12 +7528,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), - ISD::ZERO_EXTEND); - CombineTo(N, ExtLoad); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + return CombineTo(N, ExtLoad); // Return N so it doesn't get rechecked! } } @@ -7482,11 +7580,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); - CombineTo(N, And); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND); CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, - ISD::ZERO_EXTEND); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + return CombineTo(N, And); // Return N so it doesn't get rechecked! } } } @@ -12777,10 +12873,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { } // If we have load/store pair instructions and we only have two values, - // don't bother. + // don't bother merging. unsigned RequiredAlignment; if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && - St->getAlignment() >= RequiredAlignment) { + StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) { StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2); continue; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 92b0d2ae4015c..0d5e07ded25c4 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2875,6 +2875,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; case ISD::SETCCE: Res = ExpandIntOp_SETCCE(N); break; + case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break; case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break; case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break; case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break; @@ -3009,14 +3010,16 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, return; } - // Lower with SETCCE if the target supports it. + // Lower with SETCCE or SETCCCARRY if the target supports it. + EVT HiVT = LHSHi.getValueType(); + EVT ExpandVT = TLI.getTypeToExpandTo(*DAG.getContext(), HiVT); + bool HasSETCCCARRY = TLI.isOperationLegalOrCustom(ISD::SETCCCARRY, ExpandVT); + // FIXME: Make all targets support this, then remove the other lowering. - if (TLI.getOperationAction( - ISD::SETCCE, - TLI.getTypeToExpandTo(*DAG.getContext(), LHSLo.getValueType())) == - TargetLowering::Custom) { - // SETCCE can detect < and >= directly. For > and <=, flip operands and - // condition code. + if (HasSETCCCARRY || + TLI.getOperationAction(ISD::SETCCE, ExpandVT) == TargetLowering::Custom) { + // SETCCE/SETCCCARRY can detect < and >= directly. For > and <=, flip + // operands and condition code. bool FlipOperands = false; switch (CCCode) { case ISD::SETGT: CCCode = ISD::SETLT; FlipOperands = true; break; @@ -3030,27 +3033,28 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, std::swap(LHSHi, RHSHi); } // Perform a wide subtraction, feeding the carry from the low part into - // SETCCE. The SETCCE operation is essentially looking at the high part of - // the result of LHS - RHS. It is negative iff LHS < RHS. It is zero or - // positive iff LHS >= RHS. - SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue); - SDValue LowCmp = DAG.getNode(ISD::SUBC, dl, VTList, LHSLo, RHSLo); - SDValue Res = - DAG.getNode(ISD::SETCCE, dl, getSetCCResultType(LHSLo.getValueType()), - LHSHi, RHSHi, LowCmp.getValue(1), DAG.getCondCode(CCCode)); + // SETCCE/SETCCCARRY. The SETCCE/SETCCCARRY operation is essentially + // looking at the high part of the result of LHS - RHS. It is negative + // iff LHS < RHS. It is zero or positive iff LHS >= RHS. + EVT LoVT = LHSLo.getValueType(); + SDVTList VTList = DAG.getVTList( + LoVT, HasSETCCCARRY ? getSetCCResultType(LoVT) : MVT::Glue); + SDValue LowCmp = DAG.getNode(HasSETCCCARRY ? ISD::USUBO : ISD::SUBC, dl, + VTList, LHSLo, RHSLo); + SDValue Res = DAG.getNode(HasSETCCCARRY ? ISD::SETCCCARRY : ISD::SETCCE, dl, + getSetCCResultType(HiVT), LHSHi, RHSHi, + LowCmp.getValue(1), DAG.getCondCode(CCCode)); NewLHS = Res; NewRHS = SDValue(); return; } - NewLHS = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), - LHSHi, RHSHi, ISD::SETEQ, false, - DagCombineInfo, dl); + NewLHS = TLI.SimplifySetCC(getSetCCResultType(HiVT), LHSHi, RHSHi, ISD::SETEQ, + false, DagCombineInfo, dl); if (!NewLHS.getNode()) - NewLHS = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), - LHSHi, RHSHi, ISD::SETEQ); - NewLHS = DAG.getSelect(dl, LoCmp.getValueType(), - NewLHS, LoCmp, HiCmp); + NewLHS = + DAG.getSetCC(dl, getSetCCResultType(HiVT), LHSHi, RHSHi, ISD::SETEQ); + NewLHS = DAG.getSelect(dl, LoCmp.getValueType(), NewLHS, LoCmp, HiCmp); NewRHS = SDValue(); } @@ -3103,8 +3107,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { } // Otherwise, update N to have the operands specified. - return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, - DAG.getCondCode(CCCode)), 0); + return SDValue( + DAG.UpdateNodeOperands(N, NewLHS, NewRHS, DAG.getCondCode(CCCode)), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) { @@ -3125,6 +3129,24 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) { LowCmp.getValue(1), Cond); } +SDValue DAGTypeLegalizer::ExpandIntOp_SETCCCARRY(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + SDValue Cond = N->getOperand(3); + SDLoc dl = SDLoc(N); + + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedInteger(LHS, LHSLo, LHSHi); + GetExpandedInteger(RHS, RHSLo, RHSHi); + + // Expand to a SUBE for the low part and a smaller SETCCCARRY for the high. + SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), Carry.getValueType()); + SDValue LowCmp = DAG.getNode(ISD::SUBCARRY, dl, VTList, LHSLo, RHSLo, Carry); + return DAG.getNode(ISD::SETCCCARRY, dl, N->getValueType(0), LHSHi, RHSHi, + LowCmp.getValue(1), Cond); +} + SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { // The value being shifted is legal, but the shift amount is too big. // It follows that either the result of the shift is undefined, or the diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 4c3b514856b78..8e999188d8e10 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -381,6 +381,7 @@ private: SDValue ExpandIntOp_SELECT_CC(SDNode *N); SDValue ExpandIntOp_SETCC(SDNode *N); SDValue ExpandIntOp_SETCCE(SDNode *N); + SDValue ExpandIntOp_SETCCCARRY(SDNode *N); SDValue ExpandIntOp_Shift(SDNode *N); SDValue ExpandIntOp_SINT_TO_FP(SDNode *N); SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 4f4025d8ae6ad..579112c9bfc84 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -226,6 +226,7 @@ private: void UnscheduleNodeBottomUp(SUnit*); void RestoreHazardCheckerBottomUp(); void BacktrackBottomUp(SUnit*, SUnit*); + SUnit *TryUnfoldSU(SUnit *); SUnit *CopyAndMoveSuccessors(SUnit*); void InsertCopiesAndMoveSuccs(SUnit*, unsigned, const TargetRegisterClass*, @@ -780,7 +781,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { } /// CapturePred - This does the opposite of ReleasePred. Since SU is being -/// unscheduled, incrcease the succ left count of its predecessors. Remove +/// unscheduled, increase the succ left count of its predecessors. Remove /// them from AvailableQueue if necessary. void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { SUnit *PredSU = PredEdge->getSUnit(); @@ -934,6 +935,146 @@ static bool isOperandOf(const SUnit *SU, SDNode *N) { return false; } +/// TryUnfold - Attempt to unfold +SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { + SDNode *N = SU->getNode(); + // Use while over if to ease fall through. + SmallVector<SDNode *, 2> NewNodes; + if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) + return nullptr; + + // unfolding an x86 DEC64m operation results in store, dec, load which + // can't be handled here so quit + if (NewNodes.size() == 3) + return nullptr; + + assert(NewNodes.size() == 2 && "Expected a load folding node!"); + + N = NewNodes[1]; + SDNode *LoadNode = NewNodes[0]; + unsigned NumVals = N->getNumValues(); + unsigned OldNumVals = SU->getNode()->getNumValues(); + + // LoadNode may already exist. This can happen when there is another + // load from the same location and producing the same type of value + // but it has different alignment or volatileness. + bool isNewLoad = true; + SUnit *LoadSU; + if (LoadNode->getNodeId() != -1) { + LoadSU = &SUnits[LoadNode->getNodeId()]; + // If LoadSU has already been scheduled, we should clone it but + // this would negate the benefit to unfolding so just return SU. + if (LoadSU->isScheduled) + return SU; + isNewLoad = false; + } else { + LoadSU = CreateNewSUnit(LoadNode); + LoadNode->setNodeId(LoadSU->NodeNum); + + InitNumRegDefsLeft(LoadSU); + computeLatency(LoadSU); + } + + DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); + + // Now that we are committed to unfolding replace DAG Uses. + for (unsigned i = 0; i != NumVals; ++i) + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1), + SDValue(LoadNode, 1)); + + SUnit *NewSU = CreateNewSUnit(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NewSU->NodeNum); + + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { + NewSU->isTwoAddress = true; + break; + } + } + if (MCID.isCommutable()) + NewSU->isCommutable = true; + + InitNumRegDefsLeft(NewSU); + computeLatency(NewSU); + + // Record all the edges to and from the old SU, by category. + SmallVector<SDep, 4> ChainPreds; + SmallVector<SDep, 4> ChainSuccs; + SmallVector<SDep, 4> LoadPreds; + SmallVector<SDep, 4> NodePreds; + SmallVector<SDep, 4> NodeSuccs; + for (SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) + ChainPreds.push_back(Pred); + else if (isOperandOf(Pred.getSUnit(), LoadNode)) + LoadPreds.push_back(Pred); + else + NodePreds.push_back(Pred); + } + for (SDep &Succ : SU->Succs) { + if (Succ.isCtrl()) + ChainSuccs.push_back(Succ); + else + NodeSuccs.push_back(Succ); + } + + // Now assign edges to the newly-created nodes. + for (const SDep &Pred : ChainPreds) { + RemovePred(SU, Pred); + if (isNewLoad) + AddPred(LoadSU, Pred); + } + for (const SDep &Pred : LoadPreds) { + RemovePred(SU, Pred); + if (isNewLoad) + AddPred(LoadSU, Pred); + } + for (const SDep &Pred : NodePreds) { + RemovePred(SU, Pred); + AddPred(NewSU, Pred); + } + for (SDep D : NodeSuccs) { + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + D.setSUnit(NewSU); + AddPred(SuccDep, D); + // Balance register pressure. + if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled && + !D.isCtrl() && NewSU->NumRegDefsLeft > 0) + --NewSU->NumRegDefsLeft; + } + for (SDep D : ChainSuccs) { + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + if (isNewLoad) { + D.setSUnit(LoadSU); + AddPred(SuccDep, D); + } + } + + // Add a data dependency to reflect that NewSU reads the value defined + // by LoadSU. + SDep D(LoadSU, SDep::Data, 0); + D.setLatency(LoadSU->Latency); + AddPred(NewSU, D); + + if (isNewLoad) + AvailableQueue->addNode(LoadSU); + AvailableQueue->addNode(NewSU); + + ++NumUnfolds; + + if (NewSU->NumSuccsLeft == 0) + NewSU->isAvailable = true; + + return NewSU; +} + /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled /// successors to the newly created node. SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { @@ -959,135 +1100,16 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { return nullptr; } + // If possible unfold instruction. if (TryUnfold) { - SmallVector<SDNode*, 2> NewNodes; - if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) + SUnit *UnfoldSU = TryUnfoldSU(SU); + if (!UnfoldSU) return nullptr; - - // unfolding an x86 DEC64m operation results in store, dec, load which - // can't be handled here so quit - if (NewNodes.size() == 3) - return nullptr; - - DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); - assert(NewNodes.size() == 2 && "Expected a load folding node!"); - - N = NewNodes[1]; - SDNode *LoadNode = NewNodes[0]; - unsigned NumVals = N->getNumValues(); - unsigned OldNumVals = SU->getNode()->getNumValues(); - for (unsigned i = 0; i != NumVals; ++i) - DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); - DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), - SDValue(LoadNode, 1)); - - // LoadNode may already exist. This can happen when there is another - // load from the same location and producing the same type of value - // but it has different alignment or volatileness. - bool isNewLoad = true; - SUnit *LoadSU; - if (LoadNode->getNodeId() != -1) { - LoadSU = &SUnits[LoadNode->getNodeId()]; - isNewLoad = false; - } else { - LoadSU = CreateNewSUnit(LoadNode); - LoadNode->setNodeId(LoadSU->NodeNum); - - InitNumRegDefsLeft(LoadSU); - computeLatency(LoadSU); - } - - SUnit *NewSU = CreateNewSUnit(N); - assert(N->getNodeId() == -1 && "Node already inserted!"); - N->setNodeId(NewSU->NodeNum); - - const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); - for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { - if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { - NewSU->isTwoAddress = true; - break; - } - } - if (MCID.isCommutable()) - NewSU->isCommutable = true; - - InitNumRegDefsLeft(NewSU); - computeLatency(NewSU); - - // Record all the edges to and from the old SU, by category. - SmallVector<SDep, 4> ChainPreds; - SmallVector<SDep, 4> ChainSuccs; - SmallVector<SDep, 4> LoadPreds; - SmallVector<SDep, 4> NodePreds; - SmallVector<SDep, 4> NodeSuccs; - for (SDep &Pred : SU->Preds) { - if (Pred.isCtrl()) - ChainPreds.push_back(Pred); - else if (isOperandOf(Pred.getSUnit(), LoadNode)) - LoadPreds.push_back(Pred); - else - NodePreds.push_back(Pred); - } - for (SDep &Succ : SU->Succs) { - if (Succ.isCtrl()) - ChainSuccs.push_back(Succ); - else - NodeSuccs.push_back(Succ); - } - - // Now assign edges to the newly-created nodes. - for (const SDep &Pred : ChainPreds) { - RemovePred(SU, Pred); - if (isNewLoad) - AddPred(LoadSU, Pred); - } - for (const SDep &Pred : LoadPreds) { - RemovePred(SU, Pred); - if (isNewLoad) - AddPred(LoadSU, Pred); - } - for (const SDep &Pred : NodePreds) { - RemovePred(SU, Pred); - AddPred(NewSU, Pred); - } - for (SDep D : NodeSuccs) { - SUnit *SuccDep = D.getSUnit(); - D.setSUnit(SU); - RemovePred(SuccDep, D); - D.setSUnit(NewSU); - AddPred(SuccDep, D); - // Balance register pressure. - if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled - && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) - --NewSU->NumRegDefsLeft; - } - for (SDep D : ChainSuccs) { - SUnit *SuccDep = D.getSUnit(); - D.setSUnit(SU); - RemovePred(SuccDep, D); - if (isNewLoad) { - D.setSUnit(LoadSU); - AddPred(SuccDep, D); - } - } - - // Add a data dependency to reflect that NewSU reads the value defined - // by LoadSU. - SDep D(LoadSU, SDep::Data, 0); - D.setLatency(LoadSU->Latency); - AddPred(NewSU, D); - - if (isNewLoad) - AvailableQueue->addNode(LoadSU); - AvailableQueue->addNode(NewSU); - - ++NumUnfolds; - - if (NewSU->NumSuccsLeft == 0) { - NewSU->isAvailable = true; - return NewSU; - } - SU = NewSU; + SU = UnfoldSU; + N = SU->getNode(); + // If this can be scheduled don't bother duplicating and just return + if (SU->NumSuccsLeft == 0) + return SU; } DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index c37d7080f2c5a..0dbd9e846aa60 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -214,6 +214,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FPOWI: return "fpowi"; case ISD::SETCC: return "setcc"; case ISD::SETCCE: return "setcce"; + case ISD::SETCCCARRY: return "setcccarry"; case ISD::SELECT: return "select"; case ISD::VSELECT: return "vselect"; case ISD::SELECT_CC: return "select_cc"; diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 0def5ae6d0d0d..900c0318b179a 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -842,9 +842,10 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { initActions(); // Perform these initializations only once. - MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; - MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize - = MaxStoresPerMemmoveOptSize = 4; + MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = + MaxLoadsPerMemcmp = 8; + MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = + MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4; UseUnderscoreSetJmp = false; UseUnderscoreLongJmp = false; HasMultipleConditionRegisters = false; @@ -926,6 +927,7 @@ void TargetLoweringBase::initActions() { // ADDCARRY operations default to expand setOperationAction(ISD::ADDCARRY, VT, Expand); setOperationAction(ISD::SUBCARRY, VT, Expand); + setOperationAction(ISD::SETCCCARRY, VT, Expand); // These default to Expand so they will be expanded to CTLZ/CTTZ by default. setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp index 83348058eca9b..72d5e995ac225 100644 --- a/lib/CodeGen/TargetPassConfig.cpp +++ b/lib/CodeGen/TargetPassConfig.cpp @@ -261,9 +261,9 @@ TargetPassConfig::~TargetPassConfig() { // Out of line constructor provides default values for pass options and // registers all common codegen passes. -TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) +TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm) : ImmutablePass(ID), PM(&pm), Started(true), Stopped(false), - AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false), + AddingMachinePasses(false), TM(&TM), Impl(nullptr), Initialized(false), DisableVerify(false), EnableTailMerge(true), RequireCodeGenSCCOrder(false) { @@ -282,9 +282,9 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) substitutePass(&PostRAMachineLICMID, &MachineLICMID); if (StringRef(PrintMachineInstrs.getValue()).equals("")) - TM->Options.PrintMachineCode = true; + TM.Options.PrintMachineCode = true; - if (TM->Options.EnableIPRA) + if (TM.Options.EnableIPRA) setRequiresCodeGenSCCOrder(); } @@ -310,7 +310,7 @@ void TargetPassConfig::insertPass(AnalysisID TargetPassID, /// /// Targets may override this to extend TargetPassConfig. TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) { - return new TargetPassConfig(this, PM); + return new TargetPassConfig(*this, PM); } TargetPassConfig::TargetPassConfig() @@ -430,7 +430,12 @@ void TargetPassConfig::addPrintPass(const std::string &Banner) { } void TargetPassConfig::addVerifyPass(const std::string &Banner) { - if (VerifyMachineCode) + bool Verify = VerifyMachineCode; +#ifdef EXPENSIVE_CHECKS + if (VerifyMachineCode == cl::BOU_UNSET) + Verify = TM->isMachineVerifierClean(); +#endif + if (Verify) PM->add(createMachineVerifierPass(Banner)); } |