diff options
Diffstat (limited to 'llvm/lib/Transforms/IPO/ArgumentPromotion.cpp')
| -rw-r--r-- | llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 1121 |
1 files changed, 394 insertions, 727 deletions
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index e6a542385662..62cfc3294968 100644 --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -29,9 +29,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/ArgumentPromotion.h" + #include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallPtrSet.h" @@ -40,15 +39,11 @@ #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/CallGraphSCCPass.h" -#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -56,33 +51,26 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" #include "llvm/IR/NoFolder.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" #include <algorithm> #include <cassert> #include <cstdint> -#include <functional> -#include <iterator> -#include <map> -#include <set> #include <utility> #include <vector> @@ -91,43 +79,81 @@ using namespace llvm; #define DEBUG_TYPE "argpromotion" STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted"); -STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted"); -STATISTIC(NumByValArgsPromoted, "Number of byval arguments promoted"); STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated"); -/// A vector used to hold the indices of a single GEP instruction -using IndicesVector = std::vector<uint64_t>; +namespace { + +struct ArgPart { + Type *Ty; + Align Alignment; + /// A representative guaranteed-executed load or store instruction for use by + /// metadata transfer. + Instruction *MustExecInstr; +}; + +using OffsetAndArgPart = std::pair<int64_t, ArgPart>; + +} // end anonymous namespace + +static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL, + Value *Ptr, Type *ResElemTy, int64_t Offset) { + // For non-opaque pointers, try to create a "nice" GEP if possible, otherwise + // fall back to an i8 GEP to a specific offset. + unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); + APInt OrigOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset); + if (!Ptr->getType()->isOpaquePointerTy()) { + Type *OrigElemTy = Ptr->getType()->getNonOpaquePointerElementType(); + if (OrigOffset == 0 && OrigElemTy == ResElemTy) + return Ptr; + + if (OrigElemTy->isSized()) { + APInt TmpOffset = OrigOffset; + Type *TmpTy = OrigElemTy; + SmallVector<APInt> IntIndices = + DL.getGEPIndicesForOffset(TmpTy, TmpOffset); + if (TmpOffset == 0) { + // Try to add trailing zero indices to reach the right type. + while (TmpTy != ResElemTy) { + Type *NextTy = GetElementPtrInst::getTypeAtIndex(TmpTy, (uint64_t)0); + if (!NextTy) + break; + + IntIndices.push_back(APInt::getZero( + isa<StructType>(TmpTy) ? 32 : OrigOffset.getBitWidth())); + TmpTy = NextTy; + } + + SmallVector<Value *> Indices; + for (const APInt &Index : IntIndices) + Indices.push_back(IRB.getInt(Index)); + + if (OrigOffset != 0 || TmpTy == ResElemTy) { + Ptr = IRB.CreateGEP(OrigElemTy, Ptr, Indices); + return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace)); + } + } + } + } + + if (OrigOffset != 0) { + Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(AddrSpace)); + Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(OrigOffset)); + } + return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace)); +} /// DoPromotion - This method actually performs the promotion of the specified /// arguments, and returns the new function. At this point, we know that it's /// safe to do so. static Function * -doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, - SmallPtrSetImpl<Argument *> &ByValArgsToTransform, - Optional<function_ref<void(CallBase &OldCS, CallBase &NewCS)>> - ReplaceCallSite) { +doPromotion(Function *F, FunctionAnalysisManager &FAM, + const DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> + &ArgsToPromote) { // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. FunctionType *FTy = F->getFunctionType(); std::vector<Type *> Params; - using ScalarizeTable = std::set<std::pair<Type *, IndicesVector>>; - - // ScalarizedElements - If we are promoting a pointer that has elements - // accessed out of it, keep track of which elements are accessed so that we - // can add one argument for each. - // - // Arguments that are directly loaded will have a zero element value here, to - // handle cases where there are both a direct load and GEP accesses. - std::map<Argument *, ScalarizeTable> ScalarizedElements; - - // OriginalLoads - Keep track of a representative load instruction from the - // original function so that we can tell the alias analysis implementation - // what the new GEP/Load instructions we are inserting look like. - // We need to keep the original loads for each argument and the elements - // of the argument that are accessed. - std::map<std::pair<Argument *, IndicesVector>, LoadInst *> OriginalLoads; - // Attribute - Keep track of the parameter attributes for the arguments // that we are *not* promoting. For the ones that we do promote, the parameter // attributes are lost @@ -138,15 +164,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, unsigned ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgNo) { - if (ByValArgsToTransform.count(&*I)) { - // Simple byval argument? Just add all the struct element types. - Type *AgTy = I->getParamByValType(); - StructType *STy = cast<StructType>(AgTy); - llvm::append_range(Params, STy->elements()); - ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(), - AttributeSet()); - ++NumByValArgsPromoted; - } else if (!ArgsToPromote.count(&*I)) { + if (!ArgsToPromote.count(&*I)) { // Unchanged argument Params.push_back(I->getType()); ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo)); @@ -154,58 +172,12 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, // Dead argument (which are always marked as promotable) ++NumArgumentsDead; } else { - // Okay, this is being promoted. This means that the only uses are loads - // or GEPs which are only used by loads - - // In this table, we will track which indices are loaded from the argument - // (where direct loads are tracked as no indices). - ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; - for (User *U : make_early_inc_range(I->users())) { - Instruction *UI = cast<Instruction>(U); - Type *SrcTy; - if (LoadInst *L = dyn_cast<LoadInst>(UI)) - SrcTy = L->getType(); - else - SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType(); - // Skip dead GEPs and remove them. - if (isa<GetElementPtrInst>(UI) && UI->use_empty()) { - UI->eraseFromParent(); - continue; - } - - IndicesVector Indices; - Indices.reserve(UI->getNumOperands() - 1); - // Since loads will only have a single operand, and GEPs only a single - // non-index operand, this will record direct loads without any indices, - // and gep+loads with the GEP indices. - for (const Use &I : llvm::drop_begin(UI->operands())) - Indices.push_back(cast<ConstantInt>(I)->getSExtValue()); - // GEPs with a single 0 index can be merged with direct loads - if (Indices.size() == 1 && Indices.front() == 0) - Indices.clear(); - ArgIndices.insert(std::make_pair(SrcTy, Indices)); - LoadInst *OrigLoad; - if (LoadInst *L = dyn_cast<LoadInst>(UI)) - OrigLoad = L; - else - // Take any load, we will use it only to update Alias Analysis - OrigLoad = cast<LoadInst>(UI->user_back()); - OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad; - } - - // Add a parameter to the function for each element passed in. - for (const auto &ArgIndex : ArgIndices) { - // not allowed to dereference ->begin() if size() is 0 - Params.push_back(GetElementPtrInst::getIndexedType( - I->getType()->getPointerElementType(), ArgIndex.second)); + const auto &ArgParts = ArgsToPromote.find(&*I)->second; + for (const auto &Pair : ArgParts) { + Params.push_back(Pair.second.Ty); ArgAttrVec.push_back(AttributeSet()); - assert(Params.back()); } - - if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty()) - ++NumArgumentsPromoted; - else - ++NumAggregatesPromoted; + ++NumArgumentsPromoted; } } @@ -222,24 +194,30 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, // The new function will have the !dbg metadata copied from the original // function. The original function may not be deleted, and dbg metadata need - // to be unique so we need to drop it. + // to be unique, so we need to drop it. F->setSubprogram(nullptr); LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" << "From: " << *F); + uint64_t LargestVectorWidth = 0; + for (auto *I : Params) + if (auto *VT = dyn_cast<llvm::VectorType>(I)) + LargestVectorWidth = std::max( + LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinSize()); + // Recompute the parameter attributes list based on the new arguments for // the function. NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(), PAL.getRetAttrs(), ArgAttrVec)); + AttributeFuncs::updateMinLegalVectorWidthAttr(*NF, LargestVectorWidth); ArgAttrVec.clear(); F->getParent()->getFunctionList().insert(F->getIterator(), NF); NF->takeName(F); - // Loop over all of the callers of the function, transforming the call sites - // to pass in the loaded pointers. - // + // Loop over all the callers of the function, transforming the call sites to + // pass in the loaded pointers. SmallVector<Value *, 16> Args; const DataLayout &DL = F->getParent()->getDataLayout(); while (!F->use_empty()) { @@ -250,74 +228,34 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, // Loop over the operands, inserting GEP and loads in the caller as // appropriate. - auto AI = CB.arg_begin(); + auto *AI = CB.arg_begin(); ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; - ++I, ++AI, ++ArgNo) - if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { + ++I, ++AI, ++ArgNo) { + if (!ArgsToPromote.count(&*I)) { Args.push_back(*AI); // Unmodified argument ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo)); - } else if (ByValArgsToTransform.count(&*I)) { - // Emit a GEP and load for each element of the struct. - Type *AgTy = I->getParamByValType(); - StructType *STy = cast<StructType>(AgTy); - Value *Idxs[2] = { - ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr}; - const StructLayout *SL = DL.getStructLayout(STy); - Align StructAlign = *I->getParamAlign(); - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); - auto *Idx = - IRB.CreateGEP(STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i)); - // TODO: Tell AA about the new values? - Align Alignment = - commonAlignment(StructAlign, SL->getElementOffset(i)); - Args.push_back(IRB.CreateAlignedLoad( - STy->getElementType(i), Idx, Alignment, Idx->getName() + ".val")); - ArgAttrVec.push_back(AttributeSet()); - } } else if (!I->use_empty()) { - // Non-dead argument: insert GEPs and loads as appropriate. - ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; - // Store the Value* version of the indices in here, but declare it now - // for reuse. - std::vector<Value *> Ops; - for (const auto &ArgIndex : ArgIndices) { - Value *V = *AI; - LoadInst *OrigLoad = - OriginalLoads[std::make_pair(&*I, ArgIndex.second)]; - if (!ArgIndex.second.empty()) { - Ops.reserve(ArgIndex.second.size()); - Type *ElTy = V->getType(); - for (auto II : ArgIndex.second) { - // Use i32 to index structs, and i64 for others (pointers/arrays). - // This satisfies GEP constraints. - Type *IdxTy = - (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext()) - : Type::getInt64Ty(F->getContext())); - Ops.push_back(ConstantInt::get(IdxTy, II)); - // Keep track of the type we're currently indexing. - if (auto *ElPTy = dyn_cast<PointerType>(ElTy)) - ElTy = ElPTy->getPointerElementType(); - else - ElTy = GetElementPtrInst::getTypeAtIndex(ElTy, II); - } - // And create a GEP to extract those indices. - V = IRB.CreateGEP(ArgIndex.first, V, Ops, V->getName() + ".idx"); - Ops.clear(); + Value *V = *AI; + const auto &ArgParts = ArgsToPromote.find(&*I)->second; + for (const auto &Pair : ArgParts) { + LoadInst *LI = IRB.CreateAlignedLoad( + Pair.second.Ty, + createByteGEP(IRB, DL, V, Pair.second.Ty, Pair.first), + Pair.second.Alignment, V->getName() + ".val"); + if (Pair.second.MustExecInstr) { + LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata()); + LI->copyMetadata(*Pair.second.MustExecInstr, + {LLVMContext::MD_range, LLVMContext::MD_nonnull, + LLVMContext::MD_dereferenceable, + LLVMContext::MD_dereferenceable_or_null, + LLVMContext::MD_align, LLVMContext::MD_noundef}); } - // Since we're replacing a load make sure we take the alignment - // of the previous load. - LoadInst *newLoad = - IRB.CreateLoad(OrigLoad->getType(), V, V->getName() + ".val"); - newLoad->setAlignment(OrigLoad->getAlign()); - // Transfer the AA info too. - newLoad->setAAMetadata(OrigLoad->getAAMetadata()); - - Args.push_back(newLoad); + Args.push_back(LI); ArgAttrVec.push_back(AttributeSet()); } } + } // Push any varargs arguments on the list. for (; AI != CB.arg_end(); ++AI, ++ArgNo) { @@ -345,9 +283,8 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, Args.clear(); ArgAttrVec.clear(); - // Update the callgraph to know that the callsite has been transformed. - if (ReplaceCallSite) - (*ReplaceCallSite)(CB, *NewCS); + AttributeFuncs::updateMinLegalVectorWidthAttr(*CB.getCaller(), + LargestVectorWidth); if (!CB.use_empty()) { CB.replaceAllUsesWith(NewCS); @@ -364,11 +301,15 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); + // We will collect all the new created allocas to promote them into registers + // after the following loop + SmallVector<AllocaInst *, 4> Allocas; + // Loop over the argument list, transferring uses of the old arguments over to // the new arguments, also transferring over the names as well. Function::arg_iterator I2 = NF->arg_begin(); for (Argument &Arg : F->args()) { - if (!ArgsToPromote.count(&Arg) && !ByValArgsToTransform.count(&Arg)) { + if (!ArgsToPromote.count(&Arg)) { // If this is an unmodified argument, move the name and users over to the // new version. Arg.replaceAllUsesWith(&*I2); @@ -377,37 +318,6 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, continue; } - if (ByValArgsToTransform.count(&Arg)) { - // In the callee, we create an alloca, and store each of the new incoming - // arguments into the alloca. - Instruction *InsertPt = &NF->begin()->front(); - - // Just add all the struct element types. - Type *AgTy = Arg.getParamByValType(); - Align StructAlign = *Arg.getParamAlign(); - Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr, - StructAlign, "", InsertPt); - StructType *STy = cast<StructType>(AgTy); - Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), - nullptr}; - const StructLayout *SL = DL.getStructLayout(STy); - - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); - Value *Idx = GetElementPtrInst::Create( - AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i), - InsertPt); - I2->setName(Arg.getName() + "." + Twine(i)); - Align Alignment = commonAlignment(StructAlign, SL->getElementOffset(i)); - new StoreInst(&*I2++, Idx, false, Alignment, InsertPt); - } - - // Anything that used the arg should now use the alloca. - Arg.replaceAllUsesWith(TheAlloca); - TheAlloca->takeName(&Arg); - continue; - } - // There potentially are metadata uses for things like llvm.dbg.value. // Replace them with undef, after handling the other regular uses. auto RauwUndefMetadata = make_scope_exit( @@ -416,57 +326,95 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, if (Arg.use_empty()) continue; - // Otherwise, if we promoted this argument, then all users are load - // instructions (or GEPs with only load users), and all loads should be - // using the new argument that we added. - ScalarizeTable &ArgIndices = ScalarizedElements[&Arg]; + // Otherwise, if we promoted this argument, we have to create an alloca in + // the callee for every promotable part and store each of the new incoming + // arguments into the corresponding alloca, what lets the old code (the + // store instructions if they are allowed especially) a chance to work as + // before. + assert(Arg.getType()->isPointerTy() && + "Only arguments with a pointer type are promotable"); - while (!Arg.use_empty()) { - if (LoadInst *LI = dyn_cast<LoadInst>(Arg.user_back())) { - assert(ArgIndices.begin()->second.empty() && - "Load element should sort to front!"); - I2->setName(Arg.getName() + ".val"); - LI->replaceAllUsesWith(&*I2); - LI->eraseFromParent(); - LLVM_DEBUG(dbgs() << "*** Promoted load of argument '" << Arg.getName() - << "' in function '" << F->getName() << "'\n"); - } else { - GetElementPtrInst *GEP = cast<GetElementPtrInst>(Arg.user_back()); - assert(!GEP->use_empty() && - "GEPs without uses should be cleaned up already"); - IndicesVector Operands; - Operands.reserve(GEP->getNumIndices()); - for (const Use &Idx : GEP->indices()) - Operands.push_back(cast<ConstantInt>(Idx)->getSExtValue()); + IRBuilder<NoFolder> IRB(&NF->begin()->front()); - // GEPs with a single 0 index can be merged with direct loads - if (Operands.size() == 1 && Operands.front() == 0) - Operands.clear(); + // Add only the promoted elements, so parts from ArgsToPromote + SmallDenseMap<int64_t, AllocaInst *> OffsetToAlloca; + for (const auto &Pair : ArgsToPromote.find(&Arg)->second) { + int64_t Offset = Pair.first; + const ArgPart &Part = Pair.second; - Function::arg_iterator TheArg = I2; - for (ScalarizeTable::iterator It = ArgIndices.begin(); - It->second != Operands; ++It, ++TheArg) { - assert(It != ArgIndices.end() && "GEP not handled??"); - } + Argument *NewArg = I2++; + NewArg->setName(Arg.getName() + "." + Twine(Offset) + ".val"); + + AllocaInst *NewAlloca = IRB.CreateAlloca( + Part.Ty, nullptr, Arg.getName() + "." + Twine(Offset) + ".allc"); + NewAlloca->setAlignment(Pair.second.Alignment); + IRB.CreateAlignedStore(NewArg, NewAlloca, Pair.second.Alignment); - TheArg->setName(formatv("{0}.{1:$[.]}.val", Arg.getName(), - make_range(Operands.begin(), Operands.end()))); + // Collect the alloca to retarget the users to + OffsetToAlloca.insert({Offset, NewAlloca}); + } - LLVM_DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() - << "' of function '" << NF->getName() << "'\n"); + auto GetAlloca = [&](Value *Ptr) { + APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, + /* AllowNonInbounds */ true); + assert(Ptr == &Arg && "Not constant offset from arg?"); + return OffsetToAlloca.lookup(Offset.getSExtValue()); + }; - // All of the uses must be load instructions. Replace them all with - // the argument specified by ArgNo. - while (!GEP->use_empty()) { - LoadInst *L = cast<LoadInst>(GEP->user_back()); - L->replaceAllUsesWith(&*TheArg); - L->eraseFromParent(); - } - GEP->eraseFromParent(); + // Cleanup the code from the dead instructions: GEPs and BitCasts in between + // the original argument and its users: loads and stores. Retarget every + // user to the new created alloca. + SmallVector<Value *, 16> Worklist; + SmallVector<Instruction *, 16> DeadInsts; + append_range(Worklist, Arg.users()); + while (!Worklist.empty()) { + Value *V = Worklist.pop_back_val(); + if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V)) { + DeadInsts.push_back(cast<Instruction>(V)); + append_range(Worklist, V->users()); + continue; + } + + if (auto *LI = dyn_cast<LoadInst>(V)) { + Value *Ptr = LI->getPointerOperand(); + LI->setOperand(LoadInst::getPointerOperandIndex(), GetAlloca(Ptr)); + continue; + } + + if (auto *SI = dyn_cast<StoreInst>(V)) { + assert(!SI->isVolatile() && "Volatile operations can't be promoted."); + Value *Ptr = SI->getPointerOperand(); + SI->setOperand(StoreInst::getPointerOperandIndex(), GetAlloca(Ptr)); + continue; } + + llvm_unreachable("Unexpected user"); + } + + for (Instruction *I : DeadInsts) { + I->replaceAllUsesWith(PoisonValue::get(I->getType())); + I->eraseFromParent(); + } + + // Collect the allocas for promotion + for (const auto &Pair : OffsetToAlloca) { + assert(isAllocaPromotable(Pair.second) && + "By design, only promotable allocas should be produced."); + Allocas.push_back(Pair.second); } - // Increment I2 past all of the arguments added for this promoted pointer. - std::advance(I2, ArgIndices.size()); + } + + LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size() + << " alloca(s) are promotable by Mem2Reg\n"); + + if (!Allocas.empty()) { + // And we are able to call the `promoteMemoryToRegister()` function. + // Our earlier checks have ensured that PromoteMemToReg() will + // succeed. + auto &DT = FAM.getResult<DominatorTreeAnalysis>(*NF); + auto &AC = FAM.getResult<AssumptionAnalysis>(*NF); + PromoteMemToReg(Allocas, DT, &AC); } return NF; @@ -474,100 +422,37 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, /// Return true if we can prove that all callees pass in a valid pointer for the /// specified function argument. -static bool allCallersPassValidPointerForArgument(Argument *Arg, Type *Ty) { +static bool allCallersPassValidPointerForArgument(Argument *Arg, + Align NeededAlign, + uint64_t NeededDerefBytes) { Function *Callee = Arg->getParent(); const DataLayout &DL = Callee->getParent()->getDataLayout(); + APInt Bytes(64, NeededDerefBytes); - unsigned ArgNo = Arg->getArgNo(); + // Check if the argument itself is marked dereferenceable and aligned. + if (isDereferenceableAndAlignedPointer(Arg, NeededAlign, Bytes, DL)) + return true; // Look at all call sites of the function. At this point we know we only have // direct callees. - for (User *U : Callee->users()) { + return all_of(Callee->users(), [&](User *U) { CallBase &CB = cast<CallBase>(*U); - - if (!isDereferenceablePointer(CB.getArgOperand(ArgNo), Ty, DL)) - return false; - } - return true; -} - -/// Returns true if Prefix is a prefix of longer. That means, Longer has a size -/// that is greater than or equal to the size of prefix, and each of the -/// elements in Prefix is the same as the corresponding elements in Longer. -/// -/// This means it also returns true when Prefix and Longer are equal! -static bool isPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) { - if (Prefix.size() > Longer.size()) - return false; - return std::equal(Prefix.begin(), Prefix.end(), Longer.begin()); -} - -/// Checks if Indices, or a prefix of Indices, is in Set. -static bool prefixIn(const IndicesVector &Indices, - std::set<IndicesVector> &Set) { - std::set<IndicesVector>::iterator Low; - Low = Set.upper_bound(Indices); - if (Low != Set.begin()) - Low--; - // Low is now the last element smaller than or equal to Indices. This means - // it points to a prefix of Indices (possibly Indices itself), if such - // prefix exists. - // - // This load is safe if any prefix of its operands is safe to load. - return Low != Set.end() && isPrefix(*Low, Indices); + return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()), + NeededAlign, Bytes, DL); + }); } -/// Mark the given indices (ToMark) as safe in the given set of indices -/// (Safe). Marking safe usually means adding ToMark to Safe. However, if there -/// is already a prefix of Indices in Safe, Indices are implicitely marked safe -/// already. Furthermore, any indices that Indices is itself a prefix of, are -/// removed from Safe (since they are implicitely safe because of Indices now). -static void markIndicesSafe(const IndicesVector &ToMark, - std::set<IndicesVector> &Safe) { - std::set<IndicesVector>::iterator Low; - Low = Safe.upper_bound(ToMark); - // Guard against the case where Safe is empty - if (Low != Safe.begin()) - Low--; - // Low is now the last element smaller than or equal to Indices. This - // means it points to a prefix of Indices (possibly Indices itself), if - // such prefix exists. - if (Low != Safe.end()) { - if (isPrefix(*Low, ToMark)) - // If there is already a prefix of these indices (or exactly these - // indices) marked a safe, don't bother adding these indices - return; - - // Increment Low, so we can use it as a "insert before" hint - ++Low; - } - // Insert - Low = Safe.insert(Low, ToMark); - ++Low; - // If there we're a prefix of longer index list(s), remove those - std::set<IndicesVector>::iterator End = Safe.end(); - while (Low != End && isPrefix(ToMark, *Low)) { - std::set<IndicesVector>::iterator Remove = Low; - ++Low; - Safe.erase(Remove); - } -} - -/// isSafeToPromoteArgument - As you might guess from the name of this method, -/// it checks to see if it is both safe and useful to promote the argument. -/// This method limits promotion of aggregates to only promote up to three -/// elements of the aggregate in order to avoid exploding the number of -/// arguments passed in. -static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR, - unsigned MaxElements) { - using GEPIndicesSet = std::set<IndicesVector>; - +/// Determine that this argument is safe to promote, and find the argument +/// parts it can be promoted into. +static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR, + unsigned MaxElements, bool IsRecursive, + SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) { // Quick exit for unused arguments if (Arg->use_empty()) return true; - // We can only promote this argument if all of the uses are loads, or are GEP - // instructions (with constant indices) that are subsequently loaded. + // We can only promote this argument if all the uses are loads at known + // offsets. // // Promoting the argument causes it to be loaded in the caller // unconditionally. This is only safe if we can prove that either the load @@ -578,157 +463,193 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR // anyway, in the latter case, invalid loads won't happen. This prevents us // from introducing an invalid load that wouldn't have happened in the // original code. - // - // This set will contain all sets of indices that are loaded in the entry - // block, and thus are safe to unconditionally load in the caller. - GEPIndicesSet SafeToUnconditionallyLoad; - // This set contains all the sets of indices that we are planning to promote. - // This makes it possible to limit the number of arguments added. - GEPIndicesSet ToPromote; + SmallDenseMap<int64_t, ArgPart, 4> ArgParts; + Align NeededAlign(1); + uint64_t NeededDerefBytes = 0; - // If the pointer is always valid, any load with first index 0 is valid. + // And if this is a byval argument we also allow to have store instructions. + // Only handle in such way arguments with specified alignment; + // if it's unspecified, the actual alignment of the argument is + // target-specific. + bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign(); - if (ByValTy) - SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); + // An end user of a pointer argument is a load or store instruction. + // Returns None if this load or store is not based on the argument. Return + // true if we can promote the instruction, false otherwise. + auto HandleEndUser = [&](auto *I, Type *Ty, + bool GuaranteedToExecute) -> Optional<bool> { + // Don't promote volatile or atomic instructions. + if (!I->isSimple()) + return false; - // Whenever a new underlying type for the operand is found, make sure it's - // consistent with the GEPs and loads we've already seen and, if necessary, - // use it to see if all incoming pointers are valid (which implies the 0-index - // is safe). - Type *BaseTy = ByValTy; - auto UpdateBaseTy = [&](Type *NewBaseTy) { - if (BaseTy) - return BaseTy == NewBaseTy; + Value *Ptr = I->getPointerOperand(); + APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, + /* AllowNonInbounds */ true); + if (Ptr != Arg) + return None; - BaseTy = NewBaseTy; - if (allCallersPassValidPointerForArgument(Arg, BaseTy)) { - assert(SafeToUnconditionallyLoad.empty()); - SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); - } + if (Offset.getSignificantBits() >= 64) + return false; - return true; - }; + TypeSize Size = DL.getTypeStoreSize(Ty); + // Don't try to promote scalable types. + if (Size.isScalable()) + return false; - // First, iterate functions that are guaranteed to execution on function - // entry and mark loads of (geps of) arguments as safe. - BasicBlock &EntryBlock = Arg->getParent()->front(); - // Declare this here so we can reuse it - IndicesVector Indices; - for (Instruction &I : EntryBlock) { - if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { - Value *V = LI->getPointerOperand(); - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { - V = GEP->getPointerOperand(); - if (V == Arg) { - // This load actually loads (part of) Arg? Check the indices then. - Indices.reserve(GEP->getNumIndices()); - for (Use &Idx : GEP->indices()) - if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) - Indices.push_back(CI->getSExtValue()); - else - // We found a non-constant GEP index for this argument? Bail out - // right away, can't promote this argument at all. - return false; + // If this is a recursive function and one of the types is a pointer, + // then promoting it might lead to recursive promotion. + if (IsRecursive && Ty->isPointerTy()) + return false; - if (!UpdateBaseTy(GEP->getSourceElementType())) - return false; + int64_t Off = Offset.getSExtValue(); + auto Pair = ArgParts.try_emplace( + Off, ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr}); + ArgPart &Part = Pair.first->second; + bool OffsetNotSeenBefore = Pair.second; - // Indices checked out, mark them as safe - markIndicesSafe(Indices, SafeToUnconditionallyLoad); - Indices.clear(); - } - } else if (V == Arg) { - // Direct loads are equivalent to a GEP with a single 0 index. - markIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad); + // We limit promotion to only promoting up to a fixed number of elements of + // the aggregate. + if (MaxElements > 0 && ArgParts.size() > MaxElements) { + LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " + << "more than " << MaxElements << " parts\n"); + return false; + } - if (BaseTy && LI->getType() != BaseTy) - return false; + // For now, we only support loading/storing one specific type at a given + // offset. + if (Part.Ty != Ty) { + LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " + << "accessed as both " << *Part.Ty << " and " << *Ty + << " at offset " << Off << "\n"); + return false; + } - BaseTy = LI->getType(); - } + // If this instruction is not guaranteed to execute, and we haven't seen a + // load or store at this offset before (or it had lower alignment), then we + // need to remember that requirement. + // Note that skipping instructions of previously seen offsets is only + // correct because we only allow a single type for a given offset, which + // also means that the number of accessed bytes will be the same. + if (!GuaranteedToExecute && + (OffsetNotSeenBefore || Part.Alignment < I->getAlign())) { + // We won't be able to prove dereferenceability for negative offsets. + if (Off < 0) + return false; + + // If the offset is not aligned, an aligned base pointer won't help. + if (!isAligned(I->getAlign(), Off)) + return false; + + NeededDerefBytes = std::max(NeededDerefBytes, Off + Size.getFixedValue()); + NeededAlign = std::max(NeededAlign, I->getAlign()); } + Part.Alignment = std::max(Part.Alignment, I->getAlign()); + return true; + }; + + // Look for loads and stores that are guaranteed to execute on entry. + for (Instruction &I : Arg->getParent()->getEntryBlock()) { + Optional<bool> Res{}; + if (LoadInst *LI = dyn_cast<LoadInst>(&I)) + Res = HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ true); + else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) + Res = HandleEndUser(SI, SI->getValueOperand()->getType(), + /* GuaranteedToExecute */ true); + if (Res && !*Res) + return false; + if (!isGuaranteedToTransferExecutionToSuccessor(&I)) break; } - // Now, iterate all uses of the argument to see if there are any uses that are - // not (GEP+)loads, or any (GEP+)loads that are not safe to promote. + // Now look at all loads of the argument. Remember the load instructions + // for the aliasing check below. + SmallVector<const Use *, 16> Worklist; + SmallPtrSet<const Use *, 16> Visited; SmallVector<LoadInst *, 16> Loads; - IndicesVector Operands; - for (Use &U : Arg->uses()) { - User *UR = U.getUser(); - Operands.clear(); - if (LoadInst *LI = dyn_cast<LoadInst>(UR)) { - // Don't hack volatile/atomic loads - if (!LI->isSimple()) - return false; - Loads.push_back(LI); - // Direct loads are equivalent to a GEP with a zero index and then a load. - Operands.push_back(0); + auto AppendUses = [&](const Value *V) { + for (const Use &U : V->uses()) + if (Visited.insert(&U).second) + Worklist.push_back(&U); + }; + AppendUses(Arg); + while (!Worklist.empty()) { + const Use *U = Worklist.pop_back_val(); + Value *V = U->getUser(); + if (isa<BitCastInst>(V)) { + AppendUses(V); + continue; + } - if (!UpdateBaseTy(LI->getType())) + if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) { + if (!GEP->hasAllConstantIndices()) return false; - } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UR)) { - if (GEP->use_empty()) { - // Dead GEP's cause trouble later. Just remove them if we run into - // them. - continue; - } + AppendUses(V); + continue; + } - if (!UpdateBaseTy(GEP->getSourceElementType())) + if (auto *LI = dyn_cast<LoadInst>(V)) { + if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false)) return false; + Loads.push_back(LI); + continue; + } - // Ensure that all of the indices are constants. - for (Use &Idx : GEP->indices()) - if (ConstantInt *C = dyn_cast<ConstantInt>(Idx)) - Operands.push_back(C->getSExtValue()); - else - return false; // Not a constant operand GEP! - - // Ensure that the only users of the GEP are load instructions. - for (User *GEPU : GEP->users()) - if (LoadInst *LI = dyn_cast<LoadInst>(GEPU)) { - // Don't hack volatile/atomic loads - if (!LI->isSimple()) - return false; - Loads.push_back(LI); - } else { - // Other uses than load? - return false; - } - } else { - return false; // Not a load or a GEP. + // Stores are allowed for byval arguments + auto *SI = dyn_cast<StoreInst>(V); + if (AreStoresAllowed && SI && + U->getOperandNo() == StoreInst::getPointerOperandIndex()) { + if (!*HandleEndUser(SI, SI->getValueOperand()->getType(), + /* GuaranteedToExecute */ false)) + return false; + continue; + // Only stores TO the argument is allowed, all the other stores are + // unknown users } - // Now, see if it is safe to promote this load / loads of this GEP. Loading - // is safe if Operands, or a prefix of Operands, is marked as safe. - if (!prefixIn(Operands, SafeToUnconditionallyLoad)) - return false; + // Unknown user. + LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " + << "unknown user " << *V << "\n"); + return false; + } - // See if we are already promoting a load with these indices. If not, check - // to make sure that we aren't promoting too many elements. If so, nothing - // to do. - if (ToPromote.find(Operands) == ToPromote.end()) { - if (MaxElements > 0 && ToPromote.size() == MaxElements) { - LLVM_DEBUG(dbgs() << "argpromotion not promoting argument '" - << Arg->getName() - << "' because it would require adding more " - << "than " << MaxElements - << " arguments to the function.\n"); - // We limit aggregate promotion to only promoting up to a fixed number - // of elements of the aggregate. - return false; - } - ToPromote.insert(std::move(Operands)); + if (NeededDerefBytes || NeededAlign > 1) { + // Try to prove a required deref / aligned requirement. + if (!allCallersPassValidPointerForArgument(Arg, NeededAlign, + NeededDerefBytes)) { + LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " + << "not dereferenceable or aligned\n"); + return false; } } - if (Loads.empty()) + if (ArgParts.empty()) return true; // No users, this is a dead argument. - // Okay, now we know that the argument is only used by load instructions and + // Sort parts by offset. + append_range(ArgPartsVec, ArgParts); + sort(ArgPartsVec, + [](const auto &A, const auto &B) { return A.first < B.first; }); + + // Make sure the parts are non-overlapping. + int64_t Offset = ArgPartsVec[0].first; + for (const auto &Pair : ArgPartsVec) { + if (Pair.first < Offset) + return false; // Overlap with previous part. + + Offset = Pair.first + DL.getTypeStoreSize(Pair.second.Ty); + } + + // If store instructions are allowed, the path from the entry of the function + // to each load may be not free of instructions that potentially invalidate + // the load, and this is an admissible situation. + if (AreStoresAllowed) + return true; + + // Okay, now we know that the argument is only used by load instructions, and // it is safe to unconditionally perform all of them. Use alias analysis to // check to see if the pointer is guaranteed to not be modified from entry of // the function to each of the load instructions. @@ -762,118 +683,31 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR return true; } -bool ArgumentPromotionPass::isDenselyPacked(Type *type, const DataLayout &DL) { - // There is no size information, so be conservative. - if (!type->isSized()) - return false; - - // If the alloc size is not equal to the storage size, then there are padding - // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128. - if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type)) - return false; - - // FIXME: This isn't the right way to check for padding in vectors with - // non-byte-size elements. - if (VectorType *seqTy = dyn_cast<VectorType>(type)) - return isDenselyPacked(seqTy->getElementType(), DL); - - // For array types, check for padding within members. - if (ArrayType *seqTy = dyn_cast<ArrayType>(type)) - return isDenselyPacked(seqTy->getElementType(), DL); - - if (!isa<StructType>(type)) - return true; - - // Check for padding within and between elements of a struct. - StructType *StructTy = cast<StructType>(type); - const StructLayout *Layout = DL.getStructLayout(StructTy); - uint64_t StartPos = 0; - for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) { - Type *ElTy = StructTy->getElementType(i); - if (!isDenselyPacked(ElTy, DL)) - return false; - if (StartPos != Layout->getElementOffsetInBits(i)) - return false; - StartPos += DL.getTypeAllocSizeInBits(ElTy); - } - - return true; -} - -/// Checks if the padding bytes of an argument could be accessed. -static bool canPaddingBeAccessed(Argument *arg) { - assert(arg->hasByValAttr()); - - // Track all the pointers to the argument to make sure they are not captured. - SmallPtrSet<Value *, 16> PtrValues; - PtrValues.insert(arg); - - // Track all of the stores. - SmallVector<StoreInst *, 16> Stores; - - // Scan through the uses recursively to make sure the pointer is always used - // sanely. - SmallVector<Value *, 16> WorkList(arg->users()); - while (!WorkList.empty()) { - Value *V = WorkList.pop_back_val(); - if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) { - if (PtrValues.insert(V).second) - llvm::append_range(WorkList, V->users()); - } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) { - Stores.push_back(Store); - } else if (!isa<LoadInst>(V)) { - return true; - } - } - - // Check to make sure the pointers aren't captured - for (StoreInst *Store : Stores) - if (PtrValues.count(Store->getValueOperand())) - return true; - - return false; -} - -/// Check if callers and the callee \p F agree how promoted arguments would be -/// passed. The ones that they do not agree on are eliminated from the sets but -/// the return value has to be observed as well. -static bool areFunctionArgsABICompatible( - const Function &F, const TargetTransformInfo &TTI, - SmallPtrSetImpl<Argument *> &ArgsToPromote, - SmallPtrSetImpl<Argument *> &ByValArgsToTransform) { - // TODO: Check individual arguments so we can promote a subset? - SmallVector<Type *, 32> Types; - for (Argument *Arg : ArgsToPromote) - Types.push_back(Arg->getType()->getPointerElementType()); - for (Argument *Arg : ByValArgsToTransform) - Types.push_back(Arg->getParamByValType()); - - for (const Use &U : F.uses()) { +/// Check if callers and callee agree on how promoted arguments would be +/// passed. +static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F, + const TargetTransformInfo &TTI) { + return all_of(F.uses(), [&](const Use &U) { CallBase *CB = dyn_cast<CallBase>(U.getUser()); if (!CB) return false; + const Function *Caller = CB->getCaller(); const Function *Callee = CB->getCalledFunction(); - if (!TTI.areTypesABICompatible(Caller, Callee, Types)) - return false; - } - return true; + return TTI.areTypesABICompatible(Caller, Callee, Types); + }); } /// PromoteArguments - This method checks the specified function to see if there /// are any promotable arguments and if it is safe to promote the function (for /// example, all callers are direct). If safe to promote some arguments, it /// calls the DoPromotion method. -static Function * -promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, - unsigned MaxElements, - Optional<function_ref<void(CallBase &OldCS, CallBase &NewCS)>> - ReplaceCallSite, - const TargetTransformInfo &TTI) { +static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM, + unsigned MaxElements, bool IsRecursive) { // Don't perform argument promotion for naked functions; otherwise we can end // up removing parameters that are seemingly 'not used' as they are referred // to in the assembly. - if(F->hasFnAttribute(Attribute::Naked)) + if (F->hasFnAttribute(Attribute::Naked)) return nullptr; // Make sure that it is local to this module. @@ -903,20 +737,20 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, // Second check: make sure that all callers are direct callers. We can't // transform functions that have indirect callers. Also see if the function - // is self-recursive and check that target features are compatible. - bool isSelfRecursive = false; + // is self-recursive. for (Use &U : F->uses()) { CallBase *CB = dyn_cast<CallBase>(U.getUser()); // Must be a direct call. - if (CB == nullptr || !CB->isCallee(&U)) + if (CB == nullptr || !CB->isCallee(&U) || + CB->getFunctionType() != F->getFunctionType()) return nullptr; // Can't change signature of musttail callee if (CB->isMustTailCall()) return nullptr; - if (CB->getParent()->getParent() == F) - isSelfRecursive = true; + if (CB->getFunction() == F) + IsRecursive = true; } // Can't change signature of musttail caller @@ -926,16 +760,13 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, return nullptr; const DataLayout &DL = F->getParent()->getDataLayout(); - - AAResults &AAR = AARGetter(*F); + auto &AAR = FAM.getResult<AAManager>(*F); + const auto &TTI = FAM.getResult<TargetIRAnalysis>(*F); // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. - SmallPtrSet<Argument *, 8> ArgsToPromote; - SmallPtrSet<Argument *, 8> ByValArgsToTransform; + DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote; for (Argument *PtrArg : PointerArgs) { - Type *AgTy = PtrArg->getType()->getPointerElementType(); - // Replace sret attribute with noalias. This reduces register pressure by // avoiding a register copy. if (PtrArg->hasStructRetAttr()) { @@ -949,72 +780,25 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, } } - // If this is a byval argument, and if the aggregate type is small, just - // pass the elements, which is always safe, if the passed value is densely - // packed or if we can prove the padding bytes are never accessed. - // - // Only handle arguments with specified alignment; if it's unspecified, the - // actual alignment of the argument is target-specific. - bool isSafeToPromote = PtrArg->hasByValAttr() && PtrArg->getParamAlign() && - (ArgumentPromotionPass::isDenselyPacked(AgTy, DL) || - !canPaddingBeAccessed(PtrArg)); - if (isSafeToPromote) { - if (StructType *STy = dyn_cast<StructType>(AgTy)) { - if (MaxElements > 0 && STy->getNumElements() > MaxElements) { - LLVM_DEBUG(dbgs() << "argpromotion disable promoting argument '" - << PtrArg->getName() - << "' because it would require adding more" - << " than " << MaxElements - << " arguments to the function.\n"); - continue; - } + // If we can promote the pointer to its value. + SmallVector<OffsetAndArgPart, 4> ArgParts; - // If all the elements are single-value types, we can promote it. - bool AllSimple = true; - for (const auto *EltTy : STy->elements()) { - if (!EltTy->isSingleValueType()) { - AllSimple = false; - break; - } - } - - // Safe to transform, don't even bother trying to "promote" it. - // Passing the elements as a scalar will allow sroa to hack on - // the new alloca we introduce. - if (AllSimple) { - ByValArgsToTransform.insert(PtrArg); - continue; - } - } - } + if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) { + SmallVector<Type *, 4> Types; + for (const auto &Pair : ArgParts) + Types.push_back(Pair.second.Ty); - // If the argument is a recursive type and we're in a recursive - // function, we could end up infinitely peeling the function argument. - if (isSelfRecursive) { - if (StructType *STy = dyn_cast<StructType>(AgTy)) { - bool RecursiveType = - llvm::is_contained(STy->elements(), PtrArg->getType()); - if (RecursiveType) - continue; + if (areTypesABICompatible(Types, *F, TTI)) { + ArgsToPromote.insert({PtrArg, std::move(ArgParts)}); } } - - // Otherwise, see if we can promote the pointer to its value. - Type *ByValTy = - PtrArg->hasByValAttr() ? PtrArg->getParamByValType() : nullptr; - if (isSafeToPromoteArgument(PtrArg, ByValTy, AAR, MaxElements)) - ArgsToPromote.insert(PtrArg); } // No promotable pointer arguments. - if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) + if (ArgsToPromote.empty()) return nullptr; - if (!areFunctionArgsABICompatible( - *F, TTI, ArgsToPromote, ByValArgsToTransform)) - return nullptr; - - return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite); + return doPromotion(F, FAM, ArgsToPromote); } PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, @@ -1030,19 +814,10 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, FunctionAnalysisManager &FAM = AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); + bool IsRecursive = C.size() > 1; for (LazyCallGraph::Node &N : C) { Function &OldF = N.getFunction(); - - // FIXME: This lambda must only be used with this function. We should - // skip the lambda and just get the AA results directly. - auto AARGetter = [&](Function &F) -> AAResults & { - assert(&F == &OldF && "Called with an unexpected function!"); - return FAM.getResult<AAManager>(F); - }; - - const TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(OldF); - Function *NewF = - promoteArguments(&OldF, AARGetter, MaxElements, None, TTI); + Function *NewF = promoteArguments(&OldF, FAM, MaxElements, IsRecursive); if (!NewF) continue; LocalChange = true; @@ -1077,111 +852,3 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, PA.preserveSet<AllAnalysesOn<Function>>(); return PA; } - -namespace { - -/// ArgPromotion - The 'by reference' to 'by value' argument promotion pass. -struct ArgPromotion : public CallGraphSCCPass { - // Pass identification, replacement for typeid - static char ID; - - explicit ArgPromotion(unsigned MaxElements = 3) - : CallGraphSCCPass(ID), MaxElements(MaxElements) { - initializeArgPromotionPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - getAAResultsAnalysisUsage(AU); - CallGraphSCCPass::getAnalysisUsage(AU); - } - - bool runOnSCC(CallGraphSCC &SCC) override; - -private: - using llvm::Pass::doInitialization; - - bool doInitialization(CallGraph &CG) override; - - /// The maximum number of elements to expand, or 0 for unlimited. - unsigned MaxElements; -}; - -} // end anonymous namespace - -char ArgPromotion::ID = 0; - -INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion", - "Promote 'by reference' arguments to scalars", false, - false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_END(ArgPromotion, "argpromotion", - "Promote 'by reference' arguments to scalars", false, false) - -Pass *llvm::createArgumentPromotionPass(unsigned MaxElements) { - return new ArgPromotion(MaxElements); -} - -bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { - if (skipSCC(SCC)) - return false; - - // Get the callgraph information that we need to update to reflect our - // changes. - CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); - - LegacyAARGetter AARGetter(*this); - - bool Changed = false, LocalChange; - - // Iterate until we stop promoting from this SCC. - do { - LocalChange = false; - // Attempt to promote arguments from all functions in this SCC. - for (CallGraphNode *OldNode : SCC) { - Function *OldF = OldNode->getFunction(); - if (!OldF) - continue; - - auto ReplaceCallSite = [&](CallBase &OldCS, CallBase &NewCS) { - Function *Caller = OldCS.getParent()->getParent(); - CallGraphNode *NewCalleeNode = - CG.getOrInsertFunction(NewCS.getCalledFunction()); - CallGraphNode *CallerNode = CG[Caller]; - CallerNode->replaceCallEdge(cast<CallBase>(OldCS), - cast<CallBase>(NewCS), NewCalleeNode); - }; - - const TargetTransformInfo &TTI = - getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*OldF); - if (Function *NewF = promoteArguments(OldF, AARGetter, MaxElements, - {ReplaceCallSite}, TTI)) { - LocalChange = true; - - // Update the call graph for the newly promoted function. - CallGraphNode *NewNode = CG.getOrInsertFunction(NewF); - NewNode->stealCalledFunctionsFrom(OldNode); - if (OldNode->getNumReferences() == 0) - delete CG.removeFunctionFromModule(OldNode); - else - OldF->setLinkage(Function::ExternalLinkage); - - // And updat ethe SCC we're iterating as well. - SCC.ReplaceNode(OldNode, NewNode); - } - } - // Remember that we changed something. - Changed |= LocalChange; - } while (LocalChange); - - return Changed; -} - -bool ArgPromotion::doInitialization(CallGraph &CG) { - return CallGraphSCCPass::doInitialization(CG); -} |
