diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-06-01 20:58:36 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-06-01 20:58:36 +0000 |
commit | f382538d471e38a9b98f016c4caebd24c8d60b62 (patch) | |
tree | d30f3d58b1044b5355d50c17a6a96c6a0b35703a /lib/Transforms | |
parent | ee2f195dd3e40f49698ca4dc2666ec09c770e80d (diff) |
Diffstat (limited to 'lib/Transforms')
-rw-r--r-- | lib/Transforms/Coroutines/CoroCleanup.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/Coroutines/CoroEarly.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/Coroutines/CoroElide.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/Coroutines/CoroSplit.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/IPO/PartialInlining.cpp | 23 | ||||
-rw-r--r-- | lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp | 20 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCalls.cpp | 10 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/PGOInstrumentation.cpp | 64 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/SanitizerCoverage.cpp | 140 | ||||
-rw-r--r-- | lib/Transforms/Scalar/CorrelatedValuePropagation.cpp | 9 | ||||
-rw-r--r-- | lib/Transforms/Scalar/GVN.cpp | 164 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LowerExpectIntrinsic.cpp | 16 | ||||
-rw-r--r-- | lib/Transforms/Scalar/NewGVN.cpp | 38 | ||||
-rw-r--r-- | lib/Transforms/Utils/CodeExtractor.cpp | 83 | ||||
-rw-r--r-- | lib/Transforms/Utils/PredicateInfo.cpp | 35 | ||||
-rw-r--r-- | lib/Transforms/Utils/SimplifyLibCalls.cpp | 14 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 29 |
17 files changed, 309 insertions, 352 deletions
diff --git a/lib/Transforms/Coroutines/CoroCleanup.cpp b/lib/Transforms/Coroutines/CoroCleanup.cpp index 5cf2a8c25d837..359876627fce1 100644 --- a/lib/Transforms/Coroutines/CoroCleanup.cpp +++ b/lib/Transforms/Coroutines/CoroCleanup.cpp @@ -101,7 +101,9 @@ namespace { struct CoroCleanup : FunctionPass { static char ID; // Pass identification, replacement for typeid - CoroCleanup() : FunctionPass(ID) {} + CoroCleanup() : FunctionPass(ID) { + initializeCoroCleanupPass(*PassRegistry::getPassRegistry()); + } std::unique_ptr<Lowerer> L; diff --git a/lib/Transforms/Coroutines/CoroEarly.cpp b/lib/Transforms/Coroutines/CoroEarly.cpp index b529891861651..ba05896af150c 100644 --- a/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/lib/Transforms/Coroutines/CoroEarly.cpp @@ -183,7 +183,9 @@ namespace { struct CoroEarly : public FunctionPass { static char ID; // Pass identification, replacement for typeid. - CoroEarly() : FunctionPass(ID) {} + CoroEarly() : FunctionPass(ID) { + initializeCoroEarlyPass(*PassRegistry::getPassRegistry()); + } std::unique_ptr<Lowerer> L; diff --git a/lib/Transforms/Coroutines/CoroElide.cpp b/lib/Transforms/Coroutines/CoroElide.cpp index acb22449142b9..42fd6d7461459 100644 --- a/lib/Transforms/Coroutines/CoroElide.cpp +++ b/lib/Transforms/Coroutines/CoroElide.cpp @@ -258,7 +258,9 @@ static bool replaceDevirtTrigger(Function &F) { namespace { struct CoroElide : FunctionPass { static char ID; - CoroElide() : FunctionPass(ID) {} + CoroElide() : FunctionPass(ID) { + initializeCoroElidePass(*PassRegistry::getPassRegistry()); + } std::unique_ptr<Lowerer> L; diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp index cd549e4be2826..613b4a7f03e98 100644 --- a/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/lib/Transforms/Coroutines/CoroSplit.cpp @@ -681,7 +681,9 @@ namespace { struct CoroSplit : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - CoroSplit() : CallGraphSCCPass(ID) {} + CoroSplit() : CallGraphSCCPass(ID) { + initializeCoroSplitPass(*PassRegistry::getPassRegistry()); + } bool Run = false; diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 4c417f1c55ebe..bc0967448cddf 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -652,12 +652,21 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) { // only split block when necessary: PHINode *FirstPhi = getFirstPHI(PreReturn); unsigned NumPredsFromEntries = OI->ReturnBlockPreds.size(); + auto IsTrivialPhi = [](PHINode *PN) -> Value * { + Value *CommonValue = PN->getIncomingValue(0); + if (all_of(PN->incoming_values(), + [&](Value *V) { return V == CommonValue; })) + return CommonValue; + return nullptr; + }; + if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) { NewReturnBlock = NewReturnBlock->splitBasicBlock( NewReturnBlock->getFirstNonPHI()->getIterator()); BasicBlock::iterator I = PreReturn->begin(); Instruction *Ins = &NewReturnBlock->front(); + SmallVector<Instruction *, 4> DeadPhis; while (I != PreReturn->end()) { PHINode *OldPhi = dyn_cast<PHINode>(I); if (!OldPhi) @@ -674,8 +683,22 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) { RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE); OldPhi->removeIncomingValue(NewE); } + + // After incoming values splitting, the old phi may become trivial. + // Keeping the trivial phi can introduce definition inside the outline + // region which is live-out, causing necessary overhead (load, store + // arg passing etc). + if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) { + OldPhi->replaceAllUsesWith(OldPhiVal); + DeadPhis.push_back(OldPhi); + } + ++I; } + + for (auto *DP : DeadPhis) + DP->eraseFromParent(); + for (auto E : OI->ReturnBlockPreds) { BasicBlock *NewE = cast<BasicBlock>(VMap[E]); NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock); diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 659cb9df00a2c..9dede4cedd1de 100644 --- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -6,14 +6,8 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This pass prepares a module containing type metadata for ThinLTO by splitting -// it into regular and thin LTO parts if possible, and writing both parts to -// a multi-module bitcode file. Modules that do not contain type metadata are -// written unmodified as a single module. -// -//===----------------------------------------------------------------------===// +#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Analysis/ProfileSummaryInfo.h" @@ -436,3 +430,15 @@ ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str, raw_ostream *ThinLinkOS) { return new WriteThinLTOBitcode(Str, ThinLinkOS); } + +PreservedAnalyses +llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) { + FunctionAnalysisManager &FAM = + AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + writeThinLTOBitcode(OS, ThinLinkOS, + [&FAM](Function &F) -> AAResults & { + return FAM.getResult<AAManager>(F); + }, + M, &AM.getResult<ModuleSummaryIndexAnalysis>(M)); + return PreservedAnalyses::all(); +} diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 92a38f26dde7a..b44499ec4be9f 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3838,24 +3838,24 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { // Mark any parameters that are known to be non-null with the nonnull // attribute. This is helpful for inlining calls to functions with null // checks on their arguments. - SmallVector<unsigned, 4> Indices; + SmallVector<unsigned, 4> ArgNos; unsigned ArgNo = 0; for (Value *V : CS.args()) { if (V->getType()->isPointerTy() && !CS.paramHasAttr(ArgNo, Attribute::NonNull) && isKnownNonNullAt(V, CS.getInstruction(), &DT)) - Indices.push_back(ArgNo + AttributeList::FirstArgIndex); + ArgNos.push_back(ArgNo); ArgNo++; } assert(ArgNo == CS.arg_size() && "sanity check"); - if (!Indices.empty()) { + if (!ArgNos.empty()) { AttributeList AS = CS.getAttributes(); LLVMContext &Ctx = CS.getInstruction()->getContext(); - AS = AS.addAttribute(Ctx, Indices, - Attribute::get(Ctx, Attribute::NonNull)); + AS = AS.addParamAttribute(Ctx, ArgNos, + Attribute::get(Ctx, Attribute::NonNull)); CS.setAttributes(AS); Changed = true; } diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 1e30dbf6b55a8..b2d95271479c3 100644 --- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -182,6 +182,14 @@ static cl::opt<bool> cl::desc("Use this option to turn on/off " "memory intrinsic size profiling.")); +// Emit branch probability as optimization remarks. +static cl::opt<bool> + EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, + cl::desc("When this option is on, the annotated " + "branch probability will be emitted as " + " optimization remarks: -Rpass-analysis=" + "pgo-instr-use")); + // Command line option to turn on CFG dot dump after profile annotation. // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts extern cl::opt<bool> PGOViewCounts; @@ -192,6 +200,39 @@ extern cl::opt<std::string> ViewBlockFreqFuncName; namespace { +// Return a string describing the branch condition that can be +// used in static branch probability heuristics: +std::string getBranchCondString(Instruction *TI) { + BranchInst *BI = dyn_cast<BranchInst>(TI); + if (!BI || !BI->isConditional()) + return std::string(); + + Value *Cond = BI->getCondition(); + ICmpInst *CI = dyn_cast<ICmpInst>(Cond); + if (!CI) + return std::string(); + + std::string result; + raw_string_ostream OS(result); + OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; + CI->getOperand(0)->getType()->print(OS, true); + + Value *RHS = CI->getOperand(1); + ConstantInt *CV = dyn_cast<ConstantInt>(RHS); + if (CV) { + if (CV->isZero()) + OS << "_Zero"; + else if (CV->isOne()) + OS << "_One"; + else if (CV->isAllOnesValue()) + OS << "_MinusOne"; + else + OS << "_Const"; + } + OS.flush(); + return result; +} + /// The select instruction visitor plays three roles specified /// by the mode. In \c VM_counting mode, it simply counts the number of /// select instructions. In \c VM_instrument mode, it inserts code to count @@ -1424,6 +1465,29 @@ void setProfMetadata(Module *M, Instruction *TI, ArrayRef<uint64_t> EdgeCounts, for (const auto &W : Weights) { dbgs() << W << " "; } dbgs() << "\n";); TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); + if (EmitBranchProbability) { + std::string BrCondStr = getBranchCondString(TI); + if (BrCondStr.empty()) + return; + + unsigned WSum = + std::accumulate(Weights.begin(), Weights.end(), 0, + [](unsigned w1, unsigned w2) { return w1 + w2; }); + uint64_t TotalCount = + std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), 0, + [](uint64_t c1, uint64_t c2) { return c1 + c2; }); + BranchProbability BP(Weights[0], WSum); + std::string BranchProbStr; + raw_string_ostream OS(BranchProbStr); + OS << BP; + OS << " (total count : " << TotalCount << ")"; + OS.flush(); + Function *F = TI->getParent()->getParent(); + emitOptimizationRemarkAnalysis( + F->getContext(), "pgo-use-annot", *F, TI->getDebugLoc(), + Twine(BrCondStr) + + " is true with probability : " + Twine(BranchProbStr)); + } } template <> struct GraphTraits<PGOUseFunc *> { diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 300085eccb0c2..325b64cd8b436 100644 --- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -7,24 +7,7 @@ // //===----------------------------------------------------------------------===// // -// Coverage instrumentation that works with AddressSanitizer -// and potentially with other Sanitizers. -// -// We create a Guard variable with the same linkage -// as the function and inject this code into the entry block (SCK_Function) -// or all blocks (SCK_BB): -// if (Guard < 0) { -// __sanitizer_cov(&Guard); -// } -// The accesses to Guard are atomic. The rest of the logic is -// in __sanitizer_cov (it's fine to call it more than once). -// -// With SCK_Edge we also split critical edges this effectively -// instrumenting all edges. -// -// This coverage implementation provides very limited data: -// it only tells if a given function (block) was ever executed. No counters. -// But for many use cases this is what we need and the added slowdown small. +// Coverage instrumentation done on LLVM IR level, works with Sanitizers. // //===----------------------------------------------------------------------===// @@ -56,9 +39,6 @@ using namespace llvm; #define DEBUG_TYPE "sancov" -static const char *const SanCovModuleInitName = "__sanitizer_cov_module_init"; -static const char *const SanCovName = "__sanitizer_cov"; -static const char *const SanCovWithCheckName = "__sanitizer_cov_with_check"; static const char *const SanCovTracePCIndirName = "__sanitizer_cov_trace_pc_indir"; static const char *const SanCovTracePCName = "__sanitizer_cov_trace_pc"; @@ -84,12 +64,6 @@ static cl::opt<int> ClCoverageLevel( "3: all blocks and critical edges"), cl::Hidden, cl::init(0)); -static cl::opt<unsigned> ClCoverageBlockThreshold( - "sanitizer-coverage-block-threshold", - cl::desc("Use a callback with a guard check inside it if there are" - " more than this number of blocks."), - cl::Hidden, cl::init(0)); - static cl::opt<bool> ClExperimentalTracePC("sanitizer-coverage-trace-pc", cl::desc("Experimental pc tracing"), cl::Hidden, cl::init(false)); @@ -151,6 +125,8 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) { Options.TraceGep |= ClGEPTracing; Options.TracePC |= ClExperimentalTracePC; Options.TracePCGuard |= ClTracePCGuard; + if (!Options.TracePCGuard && !Options.TracePC) + Options.TracePCGuard = true; // TracePCGuard is default. Options.NoPrune |= !ClPruneBlocks; return Options; } @@ -184,18 +160,10 @@ private: ArrayRef<Instruction *> SwitchTraceTargets); bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks); void CreateFunctionGuardArray(size_t NumGuards, Function &F); - void SetNoSanitizeMetadata(Instruction *I); - void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx, - bool UseCalls); - unsigned NumberOfInstrumentedBlocks() { - return SanCovFunction->getNumUses() + - SanCovWithCheckFunction->getNumUses(); - } + void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx); StringRef getSanCovTracePCGuardSection() const; StringRef getSanCovTracePCGuardSectionStart() const; StringRef getSanCovTracePCGuardSectionEnd() const; - Function *SanCovFunction; - Function *SanCovWithCheckFunction; Function *SanCovTracePCIndir; Function *SanCovTracePC, *SanCovTracePCGuard; Function *SanCovTraceCmpFunction[4]; @@ -209,7 +177,6 @@ private: LLVMContext *C; const DataLayout *DL; - GlobalVariable *GuardArray; GlobalVariable *FunctionGuardArray; // for trace-pc-guard. bool HasSancovGuardsSection; @@ -230,16 +197,11 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { IntptrPtrTy = PointerType::getUnqual(IntptrTy); Type *VoidTy = Type::getVoidTy(*C); IRBuilder<> IRB(*C); - Type *Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty()); Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty()); Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); Int64Ty = IRB.getInt64Ty(); Int32Ty = IRB.getInt32Ty(); - SanCovFunction = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(SanCovName, VoidTy, Int32PtrTy)); - SanCovWithCheckFunction = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(SanCovWithCheckName, VoidTy, Int32PtrTy)); SanCovTracePCIndir = checkSanitizerInterfaceFunction( M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy)); SanCovTraceCmpFunction[0] = @@ -278,41 +240,10 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { SanCovTracePCGuard = checkSanitizerInterfaceFunction(M.getOrInsertFunction( SanCovTracePCGuardName, VoidTy, Int32PtrTy)); - // At this point we create a dummy array of guards because we don't - // know how many elements we will need. - Type *Int32Ty = IRB.getInt32Ty(); - - if (!Options.TracePCGuard) - GuardArray = - new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage, - nullptr, "__sancov_gen_cov_tmp"); - for (auto &F : M) runOnFunction(F); - auto N = NumberOfInstrumentedBlocks(); - - GlobalVariable *RealGuardArray = nullptr; - if (!Options.TracePCGuard) { - // Now we know how many elements we need. Create an array of guards - // with one extra element at the beginning for the size. - Type *Int32ArrayNTy = ArrayType::get(Int32Ty, N + 1); - RealGuardArray = new GlobalVariable( - M, Int32ArrayNTy, false, GlobalValue::PrivateLinkage, - Constant::getNullValue(Int32ArrayNTy), "__sancov_gen_cov"); - - // Replace the dummy array with the real one. - GuardArray->replaceAllUsesWith( - IRB.CreatePointerCast(RealGuardArray, Int32PtrTy)); - GuardArray->eraseFromParent(); - } - // Create variable for module (compilation unit) name - Constant *ModNameStrConst = - ConstantDataArray::getString(M.getContext(), M.getName(), true); - GlobalVariable *ModuleName = new GlobalVariable( - M, ModNameStrConst->getType(), true, GlobalValue::PrivateLinkage, - ModNameStrConst, "__sancov_gen_modname"); if (Options.TracePCGuard) { if (HasSancovGuardsSection) { Function *CtorFunc; @@ -339,18 +270,7 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority); } } - } else if (!Options.TracePC) { - Function *CtorFunc; - std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions( - M, SanCovModuleCtorName, SanCovModuleInitName, - {Int32PtrTy, IntptrTy, Int8PtrTy, Int8PtrTy}, - {IRB.CreatePointerCast(RealGuardArray, Int32PtrTy), - ConstantInt::get(IntptrTy, N), Constant::getNullValue(Int8PtrTy), - IRB.CreatePointerCast(ModuleName, Int8PtrTy)}); - - appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority); } - return true; } @@ -494,13 +414,12 @@ bool SanitizerCoverageModule::InjectCoverage(Function &F, return false; case SanitizerCoverageOptions::SCK_Function: CreateFunctionGuardArray(1, F); - InjectCoverageAtBlock(F, F.getEntryBlock(), 0, false); + InjectCoverageAtBlock(F, F.getEntryBlock(), 0); return true; default: { - bool UseCalls = ClCoverageBlockThreshold < AllBlocks.size(); CreateFunctionGuardArray(AllBlocks.size(), F); for (size_t i = 0, N = AllBlocks.size(); i < N; i++) - InjectCoverageAtBlock(F, *AllBlocks[i], i, UseCalls); + InjectCoverageAtBlock(F, *AllBlocks[i], i); return true; } } @@ -517,8 +436,7 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls( Function &F, ArrayRef<Instruction *> IndirCalls) { if (IndirCalls.empty()) return; - if (!Options.TracePC && !Options.TracePCGuard) - return; + assert(Options.TracePC || Options.TracePCGuard); for (auto I : IndirCalls) { IRBuilder<> IRB(I); CallSite CS(I); @@ -625,13 +543,8 @@ void SanitizerCoverageModule::InjectTraceForCmp( } } -void SanitizerCoverageModule::SetNoSanitizeMetadata(Instruction *I) { - I->setMetadata(I->getModule()->getMDKindID("nosanitize"), - MDNode::get(*C, None)); -} - void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, - size_t Idx, bool UseCalls) { + size_t Idx) { BasicBlock::iterator IP = BB.getFirstInsertionPt(); bool IsEntryBB = &BB == &F.getEntryBlock(); DebugLoc EntryLoc; @@ -651,47 +564,14 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, if (Options.TracePC) { IRB.CreateCall(SanCovTracePC); // gets the PC using GET_CALLER_PC. IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. - } else if (Options.TracePCGuard) { + } else { + assert(Options.TracePCGuard); auto GuardPtr = IRB.CreateIntToPtr( IRB.CreateAdd(IRB.CreatePointerCast(FunctionGuardArray, IntptrTy), ConstantInt::get(IntptrTy, Idx * 4)), Int32PtrTy); - if (!UseCalls) { - auto GuardLoad = IRB.CreateLoad(GuardPtr); - GuardLoad->setAtomic(AtomicOrdering::Monotonic); - GuardLoad->setAlignment(8); - SetNoSanitizeMetadata(GuardLoad); // Don't instrument with e.g. asan. - auto Cmp = IRB.CreateICmpNE( - GuardLoad, Constant::getNullValue(GuardLoad->getType())); - auto Ins = SplitBlockAndInsertIfThen( - Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); - IRB.SetInsertPoint(Ins); - IRB.SetCurrentDebugLocation(EntryLoc); - } IRB.CreateCall(SanCovTracePCGuard, GuardPtr); IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. - } else { - Value *GuardP = IRB.CreateAdd( - IRB.CreatePointerCast(GuardArray, IntptrTy), - ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4)); - GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy); - if (UseCalls) { - IRB.CreateCall(SanCovWithCheckFunction, GuardP); - } else { - LoadInst *Load = IRB.CreateLoad(GuardP); - Load->setAtomic(AtomicOrdering::Monotonic); - Load->setAlignment(4); - SetNoSanitizeMetadata(Load); - Value *Cmp = - IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load); - Instruction *Ins = SplitBlockAndInsertIfThen( - Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); - IRB.SetInsertPoint(Ins); - IRB.SetCurrentDebugLocation(EntryLoc); - // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC. - IRB.CreateCall(SanCovFunction, GuardP); - IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. - } } } diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index ee493a8ec7e18..7b625b9b136ec 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -305,7 +305,7 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) { /// Infer nonnull attributes for the arguments at the specified callsite. static bool processCallSite(CallSite CS, LazyValueInfo *LVI) { - SmallVector<unsigned, 4> Indices; + SmallVector<unsigned, 4> ArgNos; unsigned ArgNo = 0; for (Value *V : CS.args()) { @@ -318,18 +318,19 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) { LVI->getPredicateAt(ICmpInst::ICMP_EQ, V, ConstantPointerNull::get(Type), CS.getInstruction()) == LazyValueInfo::False) - Indices.push_back(ArgNo + AttributeList::FirstArgIndex); + ArgNos.push_back(ArgNo); ArgNo++; } assert(ArgNo == CS.arg_size() && "sanity check"); - if (Indices.empty()) + if (ArgNos.empty()) return false; AttributeList AS = CS.getAttributes(); LLVMContext &Ctx = CS.getInstruction()->getContext(); - AS = AS.addAttribute(Ctx, Indices, Attribute::get(Ctx, Attribute::NonNull)); + AS = AS.addParamAttribute(Ctx, ArgNos, + Attribute::get(Ctx, Attribute::NonNull)); CS.setAttributes(AS); return true; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 0d6e0538261d0..0490d93f64553 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -80,10 +80,9 @@ MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore, struct llvm::GVN::Expression { uint32_t opcode; Type *type; - bool commutative; SmallVector<uint32_t, 4> varargs; - Expression(uint32_t o = ~2U) : opcode(o), commutative(false) {} + Expression(uint32_t o = ~2U) : opcode(o) {} bool operator==(const Expression &other) const { if (opcode != other.opcode) @@ -247,7 +246,6 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) { assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!"); if (e.varargs[0] > e.varargs[1]) std::swap(e.varargs[0], e.varargs[1]); - e.commutative = true; } if (CmpInst *C = dyn_cast<CmpInst>(I)) { @@ -258,7 +256,6 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) { Predicate = CmpInst::getSwappedPredicate(Predicate); } e.opcode = (C->getOpcode() << 8) | Predicate; - e.commutative = true; } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) { for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); II != IE; ++II) @@ -284,7 +281,6 @@ GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode, Predicate = CmpInst::getSwappedPredicate(Predicate); } e.opcode = (Opcode << 8) | Predicate; - e.commutative = true; return e; } @@ -352,25 +348,25 @@ GVN::ValueTable::~ValueTable() = default; /// add - Insert a value into the table with a specified value number. void GVN::ValueTable::add(Value *V, uint32_t num) { valueNumbering.insert(std::make_pair(V, num)); - if (PHINode *PN = dyn_cast<PHINode>(V)) - NumberingPhi[num] = PN; } uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) { if (AA->doesNotAccessMemory(C)) { Expression exp = createExpr(C); - uint32_t e = assignExpNewValueNum(exp).first; + uint32_t &e = expressionNumbering[exp]; + if (!e) e = nextValueNumber++; valueNumbering[C] = e; return e; } else if (AA->onlyReadsMemory(C)) { Expression exp = createExpr(C); - auto ValNum = assignExpNewValueNum(exp); - if (ValNum.second) { - valueNumbering[C] = ValNum.first; - return ValNum.first; + uint32_t &e = expressionNumbering[exp]; + if (!e) { + e = nextValueNumber++; + valueNumbering[C] = e; + return e; } if (!MD) { - uint32_t e = assignExpNewValueNum(exp).first; + e = nextValueNumber++; valueNumbering[C] = e; return e; } @@ -526,29 +522,23 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) { case Instruction::ExtractValue: exp = createExtractvalueExpr(cast<ExtractValueInst>(I)); break; - case Instruction::PHI: - valueNumbering[V] = nextValueNumber; - NumberingPhi[nextValueNumber] = cast<PHINode>(V); - return nextValueNumber++; default: valueNumbering[V] = nextValueNumber; return nextValueNumber++; } - uint32_t e = assignExpNewValueNum(exp).first; + uint32_t& e = expressionNumbering[exp]; + if (!e) e = nextValueNumber++; valueNumbering[V] = e; return e; } /// Returns the value number of the specified value. Fails if /// the value has not yet been numbered. -uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const { +uint32_t GVN::ValueTable::lookup(Value *V) const { DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V); - if (Verify) { - assert(VI != valueNumbering.end() && "Value not numbered?"); - return VI->second; - } - return (VI != valueNumbering.end()) ? VI->second : 0; + assert(VI != valueNumbering.end() && "Value not numbered?"); + return VI->second; } /// Returns the value number of the given comparison, @@ -559,29 +549,21 @@ uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) { Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS); - return assignExpNewValueNum(exp).first; + uint32_t& e = expressionNumbering[exp]; + if (!e) e = nextValueNumber++; + return e; } /// Remove all entries from the ValueTable. void GVN::ValueTable::clear() { valueNumbering.clear(); expressionNumbering.clear(); - NumberingPhi.clear(); - PhiTranslateTable.clear(); - BlockRPONumber.clear(); nextValueNumber = 1; - Expressions.clear(); - ExprIdx.clear(); - nextExprNumber = 0; } /// Remove a value from the value numbering. void GVN::ValueTable::erase(Value *V) { - uint32_t Num = valueNumbering.lookup(V); valueNumbering.erase(V); - // If V is PHINode, V <--> value number is an one-to-one mapping. - if (isa<PHINode>(V)) - NumberingPhi.erase(Num); } /// verifyRemoved - Verify that the value is removed from all internal data @@ -1469,104 +1451,6 @@ bool GVN::processLoad(LoadInst *L) { return false; } -/// Return a pair the first field showing the value number of \p Exp and the -/// second field showing whether it is a value number newly created. -std::pair<uint32_t, bool> -GVN::ValueTable::assignExpNewValueNum(Expression &Exp) { - uint32_t &e = expressionNumbering[Exp]; - bool CreateNewValNum = !e; - if (CreateNewValNum) { - Expressions.push_back(Exp); - if (ExprIdx.size() < nextValueNumber + 1) - ExprIdx.resize(nextValueNumber * 2); - e = nextValueNumber; - ExprIdx[nextValueNumber++] = nextExprNumber++; - } - return {e, CreateNewValNum}; -} - -void GVN::ValueTable::assignBlockRPONumber(Function &F) { - uint32_t NextBlockNumber = 1; - ReversePostOrderTraversal<Function *> RPOT(&F); - for (BasicBlock *BB : RPOT) - BlockRPONumber[BB] = NextBlockNumber++; -} - -/// Return whether all the values related with the same \p num are -/// defined in \p BB. -bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB, - GVN &Gvn) { - LeaderTableEntry *Vals = &Gvn.LeaderTable[Num]; - while (Vals && Vals->BB == BB) - Vals = Vals->Next; - return !Vals; -} - -/// Wrap phiTranslateImpl to provide caching functionality. -uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred, - const BasicBlock *PhiBlock, uint32_t Num, - GVN &Gvn) { - auto FindRes = PhiTranslateTable.find({Num, Pred}); - if (FindRes != PhiTranslateTable.end()) - return FindRes->second; - uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn); - PhiTranslateTable.insert({{Num, Pred}, NewNum}); - return NewNum; -} - -/// Translate value number \p Num using phis, so that it has the values of -/// the phis in BB. -uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred, - const BasicBlock *PhiBlock, - uint32_t Num, GVN &Gvn) { - if (PHINode *PN = NumberingPhi[Num]) { - if (BlockRPONumber[Pred] >= BlockRPONumber[PhiBlock]) - return Num; - for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { - if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred) - if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false)) - return TransVal; - } - return Num; - } - - // If there is any value related with Num is defined in a BB other than - // PhiBlock, it cannot depend on a phi in PhiBlock without going through - // a backedge. We can do an early exit in that case to save compile time. - if (!areAllValsInBB(Num, PhiBlock, Gvn)) - return Num; - - if (ExprIdx[Num] == 0 || Num >= ExprIdx.size()) - return Num; - Expression Exp = Expressions[ExprIdx[Num]]; - - for (unsigned i = 0; i < Exp.varargs.size(); i++) { - // For InsertValue and ExtractValue, some varargs are index numbers - // instead of value numbers. Those index numbers should not be - // translated. - if ((i > 1 && Exp.opcode == Instruction::InsertValue) || - (i > 0 && Exp.opcode == Instruction::ExtractValue)) - continue; - Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn); - } - - if (Exp.commutative) { - assert(Exp.varargs.size() == 2 && "Unsupported commutative expression!"); - if (Exp.varargs[0] > Exp.varargs[1]) { - std::swap(Exp.varargs[0], Exp.varargs[1]); - uint32_t Opcode = Exp.opcode >> 8; - if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) - Exp.opcode = (Opcode << 8) | - CmpInst::getSwappedPredicate( - static_cast<CmpInst::Predicate>(Exp.opcode & 255)); - } - } - - if (uint32_t NewNum = expressionNumbering[Exp]) - return NewNum; - return Num; -} - // In order to find a leader for a given value number at a // specific basic block, we first obtain the list of all Values for that number, // and then scan the list to find one whose block dominates the block in @@ -1972,7 +1856,6 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, // Fabricate val-num for dead-code in order to suppress assertion in // performPRE(). assignValNumForDeadCode(); - VN.assignBlockRPONumber(F); bool PREChanged = true; while (PREChanged) { PREChanged = performPRE(F); @@ -2062,9 +1945,7 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, success = false; break; } - uint32_t TValNo = - VN.phiTranslate(Pred, Instr->getParent(), VN.lookup(Op), *this); - if (Value *V = findLeader(Pred, TValNo)) { + if (Value *V = findLeader(Pred, VN.lookup(Op))) { Instr->setOperand(i, V); } else { success = false; @@ -2081,12 +1962,10 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, Instr->insertBefore(Pred->getTerminator()); Instr->setName(Instr->getName() + ".pre"); Instr->setDebugLoc(Instr->getDebugLoc()); - - unsigned Num = VN.lookupOrAdd(Instr); - VN.add(Instr, Num); + VN.add(Instr, ValNo); // Update the availability map to include the new instruction. - addToLeaderTable(Num, Instr, Pred); + addToLeaderTable(ValNo, Instr, Pred); return true; } @@ -2135,8 +2014,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) { break; } - uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this); - Value *predV = findLeader(P, TValNo); + Value *predV = findLeader(P, ValNo); if (!predV) { predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P)); PREPred = P; diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp index a143b9a3c645f..930696b036c00 100644 --- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp +++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp @@ -98,11 +98,20 @@ template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) { CallInst *CI; ICmpInst *CmpI = dyn_cast<ICmpInst>(BSI.getCondition()); + CmpInst::Predicate Predicate; + uint64_t ValueComparedTo = 0; if (!CmpI) { CI = dyn_cast<CallInst>(BSI.getCondition()); + Predicate = CmpInst::ICMP_NE; + ValueComparedTo = 0; } else { - if (CmpI->getPredicate() != CmpInst::ICMP_NE) + Predicate = CmpI->getPredicate(); + if (Predicate != CmpInst::ICMP_NE && Predicate != CmpInst::ICMP_EQ) return false; + ConstantInt *CmpConstOperand = dyn_cast<ConstantInt>(CmpI->getOperand(1)); + if (!CmpConstOperand) + return false; + ValueComparedTo = CmpConstOperand->getZExtValue(); CI = dyn_cast<CallInst>(CmpI->getOperand(0)); } @@ -121,9 +130,8 @@ template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) { MDBuilder MDB(CI->getContext()); MDNode *Node; - // If expect value is equal to 1 it means that we are more likely to take - // branch 0, in other case more likely is branch 1. - if (ExpectedValue->isOne()) + if ((ExpectedValue->getZExtValue() == ValueComparedTo) == + (Predicate == CmpInst::ICMP_EQ)) Node = MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight); else Node = MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight); diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index 5e9f40019ce8c..27809f5b6f661 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -613,7 +613,7 @@ private: return CClass; } void initializeCongruenceClasses(Function &F); - const Expression *makePossiblePhiOfOps(Instruction *, bool, + const Expression *makePossiblePhiOfOps(Instruction *, SmallPtrSetImpl<Value *> &); void addPhiOfOps(PHINode *Op, BasicBlock *BB, Instruction *ExistingValue); @@ -1937,7 +1937,8 @@ void NewGVN::touchAndErase(Map &M, const KeyType &Key) { } void NewGVN::addAdditionalUsers(Value *To, Value *User) const { - AdditionalUsers[To].insert(User); + if (isa<Instruction>(To)) + AdditionalUsers[To].insert(User); } void NewGVN::markUsersTouched(Value *V) { @@ -2423,7 +2424,7 @@ static bool okayForPHIOfOps(const Instruction *I) { // When we see an instruction that is an op of phis, generate the equivalent phi // of ops form. const Expression * -NewGVN::makePossiblePhiOfOps(Instruction *I, bool HasBackedge, +NewGVN::makePossiblePhiOfOps(Instruction *I, SmallPtrSetImpl<Value *> &Visited) { if (!okayForPHIOfOps(I)) return nullptr; @@ -2438,24 +2439,6 @@ NewGVN::makePossiblePhiOfOps(Instruction *I, bool HasBackedge, return nullptr; unsigned IDFSNum = InstrToDFSNum(I); - // Pretty much all of the instructions we can convert to phi of ops over a - // backedge that are adds, are really induction variables, and those are - // pretty much pointless to convert. This is very coarse-grained for a - // test, so if we do find some value, we can change it later. - // But otherwise, what can happen is we convert the induction variable from - // - // i = phi (0, tmp) - // tmp = i + 1 - // - // to - // i = phi (0, tmpphi) - // tmpphi = phi(1, tmpphi+1) - // - // Which we don't want to happen. We could just avoid this for all non-cycle - // free phis, and we made go that route. - if (HasBackedge && I->getOpcode() == Instruction::Add) - return nullptr; - SmallPtrSet<const Value *, 8> ProcessedPHIs; // TODO: We don't do phi translation on memory accesses because it's // complicated. For a load, we'd need to be able to simulate a new memoryuse, @@ -2470,6 +2453,16 @@ NewGVN::makePossiblePhiOfOps(Instruction *I, bool HasBackedge, // Convert op of phis to phi of ops for (auto &Op : I->operands()) { + // TODO: We can't handle expressions that must be recursively translated + // IE + // a = phi (b, c) + // f = use a + // g = f + phi of something + // To properly make a phi of ops for g, we'd have to properly translate and + // use the instruction for f. We should add this by splitting out the + // instruction creation we do below. + if (isa<Instruction>(Op) && PHINodeUses.count(cast<Instruction>(Op))) + return nullptr; if (!isa<PHINode>(Op)) continue; auto *OpPHI = cast<PHINode>(Op); @@ -2782,8 +2775,7 @@ void NewGVN::valueNumberInstruction(Instruction *I) { // Make a phi of ops if necessary if (Symbolized && !isa<ConstantExpression>(Symbolized) && !isa<VariableExpression>(Symbolized) && PHINodeUses.count(I)) { - // FIXME: Backedge argument - auto *PHIE = makePossiblePhiOfOps(I, false, Visited); + auto *PHIE = makePossiblePhiOfOps(I, Visited); if (PHIE) Symbolized = PHIE; } diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index ed72099ec3ed6..24d28a6c28311 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" @@ -141,16 +142,77 @@ static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) { return false; } -void CodeExtractor::findInputsOutputs(ValueSet &Inputs, - ValueSet &Outputs) const { +void CodeExtractor::findAllocas(ValueSet &SinkCands) const { + Function *Func = (*Blocks.begin())->getParent(); + for (BasicBlock &BB : *Func) { + if (Blocks.count(&BB)) + continue; + for (Instruction &II : BB) { + auto *AI = dyn_cast<AllocaInst>(&II); + if (!AI) + continue; + + // Returns true if matching life time markers are found within + // the outlined region. + auto GetLifeTimeMarkers = [&](Instruction *Addr) { + Instruction *LifeStart = nullptr, *LifeEnd = nullptr; + for (User *U : Addr->users()) { + if (!definedInRegion(Blocks, U)) + return false; + + IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U); + if (IntrInst) { + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) + LifeStart = IntrInst; + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) + LifeEnd = IntrInst; + } + } + return LifeStart && LifeEnd; + }; + + if (GetLifeTimeMarkers(AI)) { + SinkCands.insert(AI); + continue; + } + + // Follow the bitcast: + Instruction *MarkerAddr = nullptr; + for (User *U : AI->users()) { + if (U->stripPointerCasts() == AI) { + Instruction *Bitcast = cast<Instruction>(U); + if (GetLifeTimeMarkers(Bitcast)) { + MarkerAddr = Bitcast; + continue; + } + } + if (!definedInRegion(Blocks, U)) { + MarkerAddr = nullptr; + break; + } + } + if (MarkerAddr) { + if (!definedInRegion(Blocks, MarkerAddr)) + SinkCands.insert(MarkerAddr); + SinkCands.insert(AI); + } + } + } +} + +void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, + const ValueSet &SinkCands) const { + for (BasicBlock *BB : Blocks) { // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. for (Instruction &II : *BB) { for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE; - ++OI) - if (definedInCaller(Blocks, *OI)) - Inputs.insert(*OI); + ++OI) { + Value *V = *OI; + if (!SinkCands.count(V) && definedInCaller(Blocks, V)) + Inputs.insert(V); + } for (User *U : II.users()) if (!definedInRegion(Blocks, U)) { @@ -718,7 +780,7 @@ Function *CodeExtractor::extractCodeRegion() { if (!isEligible()) return nullptr; - ValueSet inputs, outputs; + ValueSet inputs, outputs, SinkingCands; // Assumption: this is a single-entry code region, and the header is the first // block in the region. @@ -757,8 +819,15 @@ Function *CodeExtractor::extractCodeRegion() { "newFuncRoot"); newFuncRoot->getInstList().push_back(BranchInst::Create(header)); + findAllocas(SinkingCands); + // Find inputs to, outputs from the code region. - findInputsOutputs(inputs, outputs); + findInputsOutputs(inputs, outputs, SinkingCands); + + // Now sink all instructions which only have non-phi uses inside the region + for (auto *II : SinkingCands) + cast<Instruction>(II)->moveBefore(*newFuncRoot, + newFuncRoot->getFirstInsertionPt()); // Calculate the exit blocks for the extracted region and the total exit // weights for each of those blocks. diff --git a/lib/Transforms/Utils/PredicateInfo.cpp b/lib/Transforms/Utils/PredicateInfo.cpp index 8877aeafecdec..9e71cba4f1b7a 100644 --- a/lib/Transforms/Utils/PredicateInfo.cpp +++ b/lib/Transforms/Utils/PredicateInfo.cpp @@ -541,7 +541,40 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter, // // TODO: Use this algorithm to perform fast single-variable renaming in // promotememtoreg and memoryssa. -void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpsToRename) { +void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) { + // Sort OpsToRename since we are going to iterate it. + SmallVector<Value *, 8> OpsToRename(OpSet.begin(), OpSet.end()); + std::sort(OpsToRename.begin(), OpsToRename.end(), [&](const Value *A, + const Value *B) { + auto *ArgA = dyn_cast_or_null<Argument>(A); + auto *ArgB = dyn_cast_or_null<Argument>(B); + + // If A and B are args, order them based on their arg no. + if (ArgA && !ArgB) + return true; + if (ArgB && !ArgA) + return false; + if (ArgA && ArgB) + return ArgA->getArgNo() < ArgB->getArgNo(); + + // Else, A are B are instructions. + // If they belong to different BBs, order them by the dominance of BBs. + auto *AInst = cast<Instruction>(A); + auto *BInst = cast<Instruction>(B); + if (AInst->getParent() != BInst->getParent()) + return DT.dominates(AInst->getParent(), BInst->getParent()); + + // Else, A and B belong to the same BB. + // Order A and B by their dominance. + auto *BB = AInst->getParent(); + auto LookupResult = OBBMap.find(BB); + if (LookupResult != OBBMap.end()) + return LookupResult->second->dominates(AInst, BInst); + + auto Result = OBBMap.insert({BB, make_unique<OrderedBasicBlock>(BB)}); + return Result.first->second->dominates(AInst, BInst); + }); + ValueDFS_Compare Compare(OBBMap); // Compute liveness, and rename in O(uses) per Op. for (auto *Op : OpsToRename) { diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 49effda5d833c..cc6c47e8f978d 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -85,20 +85,6 @@ static bool isCallingConvCCompatible(CallInst *CI) { return false; } -/// Return true if it only matters that the value is equal or not-equal to zero. -static bool isOnlyUsedInZeroEqualityComparison(Value *V) { - for (User *U : V->users()) { - if (ICmpInst *IC = dyn_cast<ICmpInst>(U)) - if (IC->isEquality()) - if (Constant *C = dyn_cast<Constant>(IC->getOperand(1))) - if (C->isNullValue()) - continue; - // Unknown instruction. - return false; - } - return true; -} - /// Return true if it is only used in equality comparisons with With. static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) { for (User *U : V->users()) { diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 2b83b8426d147..8b9a64c220ccd 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7170,10 +7170,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { Type *VectorTy; unsigned C = getInstructionCost(I, VF, VectorTy); - // Note: Even if all instructions are scalarized, return true if any memory - // accesses appear in the loop to get benefits from address folding etc. bool TypeNotScalarized = - VF > 1 && !VectorTy->isVoidTy() && TTI.getNumberOfParts(VectorTy) < VF; + VF > 1 && VectorTy->isVectorTy() && TTI.getNumberOfParts(VectorTy) < VF; return VectorizationCostTy(C, TypeNotScalarized); } @@ -7312,7 +7310,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, Type *RetTy = I->getType(); if (canTruncateToMinimalBitwidth(I, VF)) RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); - VectorTy = ToVectorTy(RetTy, VF); + VectorTy = isScalarAfterVectorization(I, VF) ? RetTy : ToVectorTy(RetTy, VF); auto SE = PSE.getSE(); // TODO: We need to estimate the cost of intrinsic calls. @@ -7445,9 +7443,10 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, } else if (Legal->isUniform(Op2)) { Op2VK = TargetTransformInfo::OK_UniformValue; } - SmallVector<const Value *, 4> Operands(I->operand_values()); - return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, - Op2VK, Op1VP, Op2VP, Operands); + SmallVector<const Value *, 4> Operands(I->operand_values()); + unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1; + return N * TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, + Op2VK, Op1VP, Op2VP, Operands); } case Instruction::Select: { SelectInst *SI = cast<SelectInst>(I); @@ -7470,7 +7469,15 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, } case Instruction::Store: case Instruction::Load: { - VectorTy = ToVectorTy(getMemInstValueType(I), VF); + unsigned Width = VF; + if (Width > 1) { + InstWidening Decision = getWideningDecision(I, Width); + assert(Decision != CM_Unknown && + "CM decision should be taken at this point"); + if (Decision == CM_Scalarize) + Width = 1; + } + VectorTy = ToVectorTy(getMemInstValueType(I), Width); return getMemoryInstructionCost(I, VF); } case Instruction::ZExt: @@ -7495,7 +7502,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, } Type *SrcScalarTy = I->getOperand(0)->getType(); - Type *SrcVecTy = ToVectorTy(SrcScalarTy, VF); + Type *SrcVecTy = + VectorTy->isVectorTy() ? ToVectorTy(SrcScalarTy, VF) : SrcScalarTy; if (canTruncateToMinimalBitwidth(I, VF)) { // This cast is going to be shrunk. This may remove the cast or it might // turn it into slightly different cast. For example, if MinBW == 16, @@ -7515,7 +7523,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, } } - return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy, I); + unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1; + return N * TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy, I); } case Instruction::Call: { bool NeedToScalarize; |