diff options
Diffstat (limited to 'lib/Transforms/Scalar/MemCpyOptimizer.cpp')
-rw-r--r-- | lib/Transforms/Scalar/MemCpyOptimizer.cpp | 100 |
1 files changed, 60 insertions, 40 deletions
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 3b74421a47a0..ced923d6973d 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -398,7 +398,7 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, MemsetRanges Ranges(DL); BasicBlock::iterator BI(StartInst); - for (++BI; !isa<TerminatorInst>(BI); ++BI) { + for (++BI; !BI->isTerminator(); ++BI) { if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) { // If the instruction is readnone, ignore it, otherwise bail out. We // don't even allow readonly here because we don't want something like: @@ -413,7 +413,10 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, if (!NextStore->isSimple()) break; // Check to see if this stored value is of the same byte-splattable value. - if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) + Value *StoredByte = isBytewiseValue(NextStore->getOperand(0)); + if (isa<UndefValue>(ByteVal) && StoredByte) + ByteVal = StoredByte; + if (ByteVal != StoredByte) break; // Check to see if this store is to a constant offset from the start ptr. @@ -543,8 +546,8 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P, // Memory locations of lifted instructions. SmallVector<MemoryLocation, 8> MemLocs{StoreLoc}; - // Lifted callsites. - SmallVector<ImmutableCallSite, 8> CallSites; + // Lifted calls. + SmallVector<const CallBase *, 8> Calls; const MemoryLocation LoadLoc = MemoryLocation::get(LI); @@ -562,10 +565,9 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P, }); if (!NeedLift) - NeedLift = - llvm::any_of(CallSites, [C, &AA](const ImmutableCallSite &CS) { - return isModOrRefSet(AA.getModRefInfo(C, CS)); - }); + NeedLift = llvm::any_of(Calls, [C, &AA](const CallBase *Call) { + return isModOrRefSet(AA.getModRefInfo(C, Call)); + }); } if (!NeedLift) @@ -576,12 +578,12 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P, // none of them may modify its source. if (isModSet(AA.getModRefInfo(C, LoadLoc))) return false; - else if (auto CS = ImmutableCallSite(C)) { + else if (const auto *Call = dyn_cast<CallBase>(C)) { // If we can't lift this before P, it's game over. - if (isModOrRefSet(AA.getModRefInfo(P, CS))) + if (isModOrRefSet(AA.getModRefInfo(P, Call))) return false; - CallSites.push_back(CS); + Calls.push_back(Call); } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) { // If we can't lift this before P, it's game over. auto ML = MemoryLocation::get(C); @@ -672,13 +674,11 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (UseMemMove) M = Builder.CreateMemMove( SI->getPointerOperand(), findStoreAlignment(DL, SI), - LI->getPointerOperand(), findLoadAlignment(DL, LI), Size, - SI->isVolatile()); + LI->getPointerOperand(), findLoadAlignment(DL, LI), Size); else M = Builder.CreateMemCpy( SI->getPointerOperand(), findStoreAlignment(DL, SI), - LI->getPointerOperand(), findLoadAlignment(DL, LI), Size, - SI->isVolatile()); + LI->getPointerOperand(), findLoadAlignment(DL, LI), Size); LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M << "\n"); @@ -767,8 +767,8 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (!Align) Align = DL.getABITypeAlignment(T); IRBuilder<> Builder(SI); - auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, - Size, Align, SI->isVolatile()); + auto *M = + Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size, Align); LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n"); @@ -916,8 +916,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest, continue; } if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U)) - if (IT->getIntrinsicID() == Intrinsic::lifetime_start || - IT->getIntrinsicID() == Intrinsic::lifetime_end) + if (IT->isLifetimeStartOrEnd()) continue; if (U != C && U != cpy) @@ -942,10 +941,10 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest, // the use analysis, we also need to know that it does not sneakily // access dest. We rely on AA to figure this out for us. AliasAnalysis &AA = LookupAliasAnalysis(); - ModRefInfo MR = AA.getModRefInfo(C, cpyDest, srcSize); + ModRefInfo MR = AA.getModRefInfo(C, cpyDest, LocationSize::precise(srcSize)); // If necessary, perform additional analysis. if (isModOrRefSet(MR)) - MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT); + MR = AA.callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), &DT); if (isModOrRefSet(MR)) return false; @@ -993,8 +992,9 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest, // handled here, but combineMetadata doesn't support them yet unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, LLVMContext::MD_noalias, - LLVMContext::MD_invariant_group}; - combineMetadata(C, cpy, KnownIDs); + LLVMContext::MD_invariant_group, + LLVMContext::MD_access_group}; + combineMetadata(C, cpy, KnownIDs, true); // Remove the memcpy. MD->removeInstruction(cpy); @@ -1056,6 +1056,8 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, UseMemMove = true; // If all checks passed, then we can transform M. + LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n" + << *MDep << '\n' << *M << '\n'); // TODO: Is this worth it if we're creating a less aligned memcpy? For // example we could be moving from movaps -> movq on x86. @@ -1141,6 +1143,21 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, return true; } +/// Determine whether the instruction has undefined content for the given Size, +/// either because it was freshly alloca'd or started its lifetime. +static bool hasUndefContents(Instruction *I, ConstantInt *Size) { + if (isa<AllocaInst>(I)) + return true; + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + if (II->getIntrinsicID() == Intrinsic::lifetime_start) + if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0))) + if (LTSize->getZExtValue() >= Size->getZExtValue()) + return true; + + return false; +} + /// Transform memcpy to memset when its source was just memset. /// In other words, turn: /// \code @@ -1164,12 +1181,27 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, if (!AA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource())) return false; - ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength()); + // A known memset size is required. ConstantInt *MemSetSize = dyn_cast<ConstantInt>(MemSet->getLength()); + if (!MemSetSize) + return false; + // Make sure the memcpy doesn't read any more than what the memset wrote. // Don't worry about sizes larger than i64. - if (!MemSetSize || CopySize->getZExtValue() > MemSetSize->getZExtValue()) - return false; + ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength()); + if (CopySize->getZExtValue() > MemSetSize->getZExtValue()) { + // If the memcpy is larger than the memset, but the memory was undef prior + // to the memset, we can just ignore the tail. Technically we're only + // interested in the bytes from MemSetSize..CopySize here, but as we can't + // easily represent this location, we use the full 0..CopySize range. + MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy); + MemDepResult DepInfo = MD->getPointerDependencyFrom( + MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent()); + if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize)) + CopySize = MemSetSize; + else + return false; + } IRBuilder<> Builder(MemCpy); Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), @@ -1249,19 +1281,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) { if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst())) return processMemCpyMemCpyDependence(M, MDep); } else if (SrcDepInfo.isDef()) { - Instruction *I = SrcDepInfo.getInst(); - bool hasUndefContents = false; - - if (isa<AllocaInst>(I)) { - hasUndefContents = true; - } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start) - if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0))) - if (LTSize->getZExtValue() >= CopySize->getZExtValue()) - hasUndefContents = true; - } - - if (hasUndefContents) { + if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) { MD->removeInstruction(M); M->eraseFromParent(); ++NumMemCpyInstr; @@ -1320,7 +1340,7 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) { Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType(); uint64_t ByValSize = DL.getTypeAllocSize(ByValTy); MemDepResult DepInfo = MD->getPointerDependencyFrom( - MemoryLocation(ByValArg, ByValSize), true, + MemoryLocation(ByValArg, LocationSize::precise(ByValSize)), true, CS.getInstruction()->getIterator(), CS.getInstruction()->getParent()); if (!DepInfo.isClobber()) return false; |