diff options
Diffstat (limited to 'lib/Target/PowerPC/PPCCTRLoops.cpp')
-rw-r--r-- | lib/Target/PowerPC/PPCCTRLoops.cpp | 585 |
1 files changed, 3 insertions, 582 deletions
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 6b9e2383e36f..2b8d9b87724f 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -1,9 +1,8 @@ //===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -72,70 +71,7 @@ using namespace llvm; static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1)); #endif -// The latency of mtctr is only justified if there are more than 4 -// comparisons that will be removed as a result. -static cl::opt<unsigned> -SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, - cl::desc("Loops with a constant trip count smaller than " - "this value will not use the count register.")); - -STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops"); - -namespace llvm { - void initializePPCCTRLoopsPass(PassRegistry&); -#ifndef NDEBUG - void initializePPCCTRLoopsVerifyPass(PassRegistry&); -#endif -} - namespace { - struct PPCCTRLoops : public FunctionPass { - -#ifndef NDEBUG - static int Counter; -#endif - - public: - static char ID; - - PPCCTRLoops() : FunctionPass(ID) { - initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<LoopInfoWrapperPass>(); - AU.addPreserved<LoopInfoWrapperPass>(); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addRequired<ScalarEvolutionWrapperPass>(); - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - } - - private: - bool mightUseCTR(BasicBlock *BB); - bool convertToCTRLoop(Loop *L); - - private: - const PPCTargetMachine *TM; - const PPCSubtarget *STI; - const PPCTargetLowering *TLI; - const DataLayout *DL; - const TargetLibraryInfo *LibInfo; - const TargetTransformInfo *TTI; - LoopInfo *LI; - ScalarEvolution *SE; - DominatorTree *DT; - bool PreserveLCSSA; - TargetSchedModel SchedModel; - }; - - char PPCCTRLoops::ID = 0; -#ifndef NDEBUG - int PPCCTRLoops::Counter = 0; -#endif #ifndef NDEBUG struct PPCCTRLoopsVerify : public MachineFunctionPass { @@ -161,16 +97,6 @@ namespace { #endif // NDEBUG } // end anonymous namespace -INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", - false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) -INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", - false, false) - -FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); } - #ifndef NDEBUG INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", "PowerPC CTR Loops Verify", false, false) @@ -183,511 +109,6 @@ FunctionPass *llvm::createPPCCTRLoopsVerify() { } #endif // NDEBUG -bool PPCCTRLoops::runOnFunction(Function &F) { - if (skipFunction(F)) - return false; - - auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); - if (!TPC) - return false; - - TM = &TPC->getTM<PPCTargetMachine>(); - STI = TM->getSubtargetImpl(F); - TLI = STI->getTargetLowering(); - - LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - DL = &F.getParent()->getDataLayout(); - auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); - LibInfo = TLIP ? &TLIP->getTLI() : nullptr; - PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); - - bool MadeChange = false; - - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); - I != E; ++I) { - Loop *L = *I; - if (!L->getParentLoop()) - MadeChange |= convertToCTRLoop(L); - } - - return MadeChange; -} - -static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) { - if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) - return ITy->getBitWidth() > (Is32Bit ? 32U : 64U); - - return false; -} - -// Determining the address of a TLS variable results in a function call in -// certain TLS models. -static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) { - const auto *GV = dyn_cast<GlobalValue>(MemAddr); - if (!GV) { - // Recurse to check for constants that refer to TLS global variables. - if (const auto *CV = dyn_cast<Constant>(MemAddr)) - for (const auto &CO : CV->operands()) - if (memAddrUsesCTR(TM, CO)) - return true; - - return false; - } - - if (!GV->isThreadLocal()) - return false; - TLSModel::Model Model = TM.getTLSModel(GV); - return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic; -} - -// Loop through the inline asm constraints and look for something that clobbers -// ctr. -static bool asmClobbersCTR(InlineAsm *IA) { - InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints(); - for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) { - InlineAsm::ConstraintInfo &C = CIV[i]; - if (C.Type != InlineAsm::isInput) - for (unsigned j = 0, je = C.Codes.size(); j < je; ++j) - if (StringRef(C.Codes[j]).equals_lower("{ctr}")) - return true; - } - return false; -} - -bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) { - for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); - J != JE; ++J) { - if (CallInst *CI = dyn_cast<CallInst>(J)) { - // Inline ASM is okay, unless it clobbers the ctr register. - if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) { - if (asmClobbersCTR(IA)) - return true; - continue; - } - - if (Function *F = CI->getCalledFunction()) { - // Most intrinsics don't become function calls, but some might. - // sin, cos, exp and log are always calls. - unsigned Opcode = 0; - if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { - switch (F->getIntrinsicID()) { - default: continue; - // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr - // we're definitely using CTR. - case Intrinsic::ppc_is_decremented_ctr_nonzero: - case Intrinsic::ppc_mtctr: - return true; - -// VisualStudio defines setjmp as _setjmp -#if defined(_MSC_VER) && defined(setjmp) && \ - !defined(setjmp_undefined_for_msvc) -# pragma push_macro("setjmp") -# undef setjmp -# define setjmp_undefined_for_msvc -#endif - - case Intrinsic::setjmp: - -#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc) - // let's return it to _setjmp state -# pragma pop_macro("setjmp") -# undef setjmp_undefined_for_msvc -#endif - - case Intrinsic::longjmp: - - // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp - // because, although it does clobber the counter register, the - // control can't then return to inside the loop unless there is also - // an eh_sjlj_setjmp. - case Intrinsic::eh_sjlj_setjmp: - - case Intrinsic::memcpy: - case Intrinsic::memmove: - case Intrinsic::memset: - case Intrinsic::powi: - case Intrinsic::log: - case Intrinsic::log2: - case Intrinsic::log10: - case Intrinsic::exp: - case Intrinsic::exp2: - case Intrinsic::pow: - case Intrinsic::sin: - case Intrinsic::cos: - return true; - case Intrinsic::copysign: - if (CI->getArgOperand(0)->getType()->getScalarType()-> - isPPC_FP128Ty()) - return true; - else - continue; // ISD::FCOPYSIGN is never a library call. - case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; - case Intrinsic::floor: Opcode = ISD::FFLOOR; break; - case Intrinsic::ceil: Opcode = ISD::FCEIL; break; - case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; - case Intrinsic::rint: Opcode = ISD::FRINT; break; - case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; - case Intrinsic::round: Opcode = ISD::FROUND; break; - case Intrinsic::minnum: Opcode = ISD::FMINNUM; break; - case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break; - case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break; - case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break; - } - } - - // PowerPC does not use [US]DIVREM or other library calls for - // operations on regular types which are not otherwise library calls - // (i.e. soft float or atomics). If adapting for targets that do, - // additional care is required here. - - LibFunc Func; - if (!F->hasLocalLinkage() && F->hasName() && LibInfo && - LibInfo->getLibFunc(F->getName(), Func) && - LibInfo->hasOptimizedCodeGen(Func)) { - // Non-read-only functions are never treated as intrinsics. - if (!CI->onlyReadsMemory()) - return true; - - // Conversion happens only for FP calls. - if (!CI->getArgOperand(0)->getType()->isFloatingPointTy()) - return true; - - switch (Func) { - default: return true; - case LibFunc_copysign: - case LibFunc_copysignf: - continue; // ISD::FCOPYSIGN is never a library call. - case LibFunc_copysignl: - return true; - case LibFunc_fabs: - case LibFunc_fabsf: - case LibFunc_fabsl: - continue; // ISD::FABS is never a library call. - case LibFunc_sqrt: - case LibFunc_sqrtf: - case LibFunc_sqrtl: - Opcode = ISD::FSQRT; break; - case LibFunc_floor: - case LibFunc_floorf: - case LibFunc_floorl: - Opcode = ISD::FFLOOR; break; - case LibFunc_nearbyint: - case LibFunc_nearbyintf: - case LibFunc_nearbyintl: - Opcode = ISD::FNEARBYINT; break; - case LibFunc_ceil: - case LibFunc_ceilf: - case LibFunc_ceill: - Opcode = ISD::FCEIL; break; - case LibFunc_rint: - case LibFunc_rintf: - case LibFunc_rintl: - Opcode = ISD::FRINT; break; - case LibFunc_round: - case LibFunc_roundf: - case LibFunc_roundl: - Opcode = ISD::FROUND; break; - case LibFunc_trunc: - case LibFunc_truncf: - case LibFunc_truncl: - Opcode = ISD::FTRUNC; break; - case LibFunc_fmin: - case LibFunc_fminf: - case LibFunc_fminl: - Opcode = ISD::FMINNUM; break; - case LibFunc_fmax: - case LibFunc_fmaxf: - case LibFunc_fmaxl: - Opcode = ISD::FMAXNUM; break; - } - } - - if (Opcode) { - EVT EVTy = - TLI->getValueType(*DL, CI->getArgOperand(0)->getType(), true); - - if (EVTy == MVT::Other) - return true; - - if (TLI->isOperationLegalOrCustom(Opcode, EVTy)) - continue; - else if (EVTy.isVector() && - TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType())) - continue; - - return true; - } - } - - return true; - } else if (isa<BinaryOperator>(J) && - J->getType()->getScalarType()->isPPC_FP128Ty()) { - // Most operations on ppc_f128 values become calls. - return true; - } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) || - isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) { - CastInst *CI = cast<CastInst>(J); - if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || - CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || - isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) || - isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType())) - return true; - } else if (isLargeIntegerTy(!TM->isPPC64(), - J->getType()->getScalarType()) && - (J->getOpcode() == Instruction::UDiv || - J->getOpcode() == Instruction::SDiv || - J->getOpcode() == Instruction::URem || - J->getOpcode() == Instruction::SRem)) { - return true; - } else if (!TM->isPPC64() && - isLargeIntegerTy(false, J->getType()->getScalarType()) && - (J->getOpcode() == Instruction::Shl || - J->getOpcode() == Instruction::AShr || - J->getOpcode() == Instruction::LShr)) { - // Only on PPC32, for 128-bit integers (specifically not 64-bit - // integers), these might be runtime calls. - return true; - } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) { - // On PowerPC, indirect jumps use the counter register. - return true; - } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) { - if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) - return true; - } - - // FREM is always a call. - if (J->getOpcode() == Instruction::FRem) - return true; - - if (STI->useSoftFloat()) { - switch(J->getOpcode()) { - case Instruction::FAdd: - case Instruction::FSub: - case Instruction::FMul: - case Instruction::FDiv: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FCmp: - return true; - } - } - - for (Value *Operand : J->operands()) - if (memAddrUsesCTR(*TM, Operand)) - return true; - } - - return false; -} -bool PPCCTRLoops::convertToCTRLoop(Loop *L) { - bool MadeChange = false; - - // Do not convert small short loops to CTR loop. - unsigned ConstTripCount = SE->getSmallConstantTripCount(L); - if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) { - SmallPtrSet<const Value *, 32> EphValues; - auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache( - *L->getHeader()->getParent()); - CodeMetrics::collectEphemeralValues(L, &AC, EphValues); - CodeMetrics Metrics; - for (BasicBlock *BB : L->blocks()) - Metrics.analyzeBasicBlock(BB, *TTI, EphValues); - // 6 is an approximate latency for the mtctr instruction. - if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth())) - return false; - } - - // Process nested loops first. - for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { - MadeChange |= convertToCTRLoop(*I); - LLVM_DEBUG(dbgs() << "Nested loop converted\n"); - } - - // If a nested loop has been converted, then we can't convert this loop. - if (MadeChange) - return MadeChange; - - // Bail out if the loop has irreducible control flow. - LoopBlocksRPO RPOT(L); - RPOT.perform(LI); - if (containsIrreducibleCFG<const BasicBlock *>(RPOT, *LI)) - return false; - -#ifndef NDEBUG - // Stop trying after reaching the limit (if any). - int Limit = CTRLoopLimit; - if (Limit >= 0) { - if (Counter >= CTRLoopLimit) - return false; - Counter++; - } -#endif - - // We don't want to spill/restore the counter register, and so we don't - // want to use the counter register if the loop contains calls. - for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); - I != IE; ++I) - if (mightUseCTR(*I)) - return MadeChange; - - SmallVector<BasicBlock*, 4> ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - - // If there is an exit edge known to be frequently taken, - // we should not transform this loop. - for (auto &BB : ExitingBlocks) { - Instruction *TI = BB->getTerminator(); - if (!TI) continue; - - if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { - uint64_t TrueWeight = 0, FalseWeight = 0; - if (!BI->isConditional() || - !BI->extractProfMetadata(TrueWeight, FalseWeight)) - continue; - - // If the exit path is more frequent than the loop path, - // we return here without further analysis for this loop. - bool TrueIsExit = !L->contains(BI->getSuccessor(0)); - if (( TrueIsExit && FalseWeight < TrueWeight) || - (!TrueIsExit && FalseWeight > TrueWeight)) - return MadeChange; - } - } - - BasicBlock *CountedExitBlock = nullptr; - const SCEV *ExitCount = nullptr; - BranchInst *CountedExitBranch = nullptr; - for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), - IE = ExitingBlocks.end(); I != IE; ++I) { - const SCEV *EC = SE->getExitCount(L, *I); - LLVM_DEBUG(dbgs() << "Exit Count for " << *L << " from block " - << (*I)->getName() << ": " << *EC << "\n"); - if (isa<SCEVCouldNotCompute>(EC)) - continue; - if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) { - if (ConstEC->getValue()->isZero()) - continue; - } else if (!SE->isLoopInvariant(EC, L)) - continue; - - if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32)) - continue; - - // If this exiting block is contained in a nested loop, it is not eligible - // for insertion of the branch-and-decrement since the inner loop would - // end up messing up the value in the CTR. - if (LI->getLoopFor(*I) != L) - continue; - - // We now have a loop-invariant count of loop iterations (which is not the - // constant zero) for which we know that this loop will not exit via this - // existing block. - - // We need to make sure that this block will run on every loop iteration. - // For this to be true, we must dominate all blocks with backedges. Such - // blocks are in-loop predecessors to the header block. - bool NotAlways = false; - for (pred_iterator PI = pred_begin(L->getHeader()), - PIE = pred_end(L->getHeader()); PI != PIE; ++PI) { - if (!L->contains(*PI)) - continue; - - if (!DT->dominates(*I, *PI)) { - NotAlways = true; - break; - } - } - - if (NotAlways) - continue; - - // Make sure this blocks ends with a conditional branch. - Instruction *TI = (*I)->getTerminator(); - if (!TI) - continue; - - if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { - if (!BI->isConditional()) - continue; - - CountedExitBranch = BI; - } else - continue; - - // Note that this block may not be the loop latch block, even if the loop - // has a latch block. - CountedExitBlock = *I; - ExitCount = EC; - break; - } - - if (!CountedExitBlock) - return MadeChange; - - BasicBlock *Preheader = L->getLoopPreheader(); - - // If we don't have a preheader, then insert one. If we already have a - // preheader, then we can use it (except if the preheader contains a use of - // the CTR register because some such uses might be reordered by the - // selection DAG after the mtctr instruction). - if (!Preheader || mightUseCTR(Preheader)) - Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); - if (!Preheader) - return MadeChange; - - LLVM_DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() - << "\n"); - - // Insert the count into the preheader and replace the condition used by the - // selected branch. - MadeChange = true; - - SCEVExpander SCEVE(*SE, *DL, "loopcnt"); - LLVMContext &C = SE->getContext(); - Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); - if (!ExitCount->getType()->isPointerTy() && - ExitCount->getType() != CountType) - ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); - ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType)); - Value *ECValue = - SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator()); - - IRBuilder<> CountBuilder(Preheader->getTerminator()); - Module *M = Preheader->getParent()->getParent(); - Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, - CountType); - CountBuilder.CreateCall(MTCTRFunc, ECValue); - - IRBuilder<> CondBuilder(CountedExitBranch); - Value *DecFunc = - Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero); - Value *NewCond = CondBuilder.CreateCall(DecFunc, {}); - Value *OldCond = CountedExitBranch->getCondition(); - CountedExitBranch->setCondition(NewCond); - - // The false branch must exit the loop. - if (!L->contains(CountedExitBranch->getSuccessor(0))) - CountedExitBranch->swapSuccessors(); - - // The old condition may be dead now, and may have even created a dead PHI - // (the original induction variable). - RecursivelyDeleteTriviallyDeadInstructions(OldCond); - // Run through the basic blocks of the loop and see if any of them have dead - // PHIs that can be removed. - for (auto I : L->blocks()) - DeleteDeadPHIs(I); - - ++NumCTRLoops; - return MadeChange; -} - #ifndef NDEBUG static bool clobbersCTR(const MachineInstr &MI) { for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { |