diff options
Diffstat (limited to 'llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp')
| -rw-r--r-- | llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp | 217 |
1 files changed, 179 insertions, 38 deletions
diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp index 3d75dd57456d..b4acb1b2ae90 100644 --- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp +++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -7,9 +7,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; @@ -18,7 +20,9 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, - const TargetTransformInfo &TTI) { + bool CanOverlap, + const TargetTransformInfo &TTI, + Optional<uint32_t> AtomicElementSize) { // No need to expand zero length copies. if (CopyLen->isZero()) return; @@ -28,15 +32,25 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Function *ParentFunc = PreLoopBB->getParent(); LLVMContext &Ctx = PreLoopBB->getContext(); const DataLayout &DL = ParentFunc->getParent()->getDataLayout(); + MDBuilder MDB(Ctx); + MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain"); + StringRef Name = "MemCopyAliasScope"; + MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); Type *TypeOfCopyLen = CopyLen->getType(); Type *LoopOpType = TTI.getMemcpyLoopLoweringType( - Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value()); + Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(), + AtomicElementSize); + assert((!AtomicElementSize || !LoopOpType->isVectorTy()) && + "Atomic memcpy lowering is not supported for vector operand type"); unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType); + assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) && + "Atomic memcpy lowering is not supported for selected operand size"); + uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize; if (LoopEndCount != 0) { @@ -68,12 +82,25 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, // Loop Body Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); - Value *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, - PartSrcAlign, SrcIsVolatile); + LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, + PartSrcAlign, SrcIsVolatile); + if (!CanOverlap) { + // Set alias scope for loads. + Load->setMetadata(LLVMContext::MD_alias_scope, + MDNode::get(Ctx, NewScope)); + } Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); - LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); - + StoreInst *Store = LoopBuilder.CreateAlignedStore( + Load, DstGEP, PartDstAlign, DstIsVolatile); + if (!CanOverlap) { + // Indicate that stores don't overlap loads. + Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); + } + if (AtomicElementSize) { + Load->setAtomic(AtomicOrdering::Unordered); + Store->setAtomic(AtomicOrdering::Unordered); + } Value *NewIndex = LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U)); LoopIndex->addIncoming(NewIndex, LoopBB); @@ -93,7 +120,7 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, SmallVector<Type *, 5> RemainingOps; TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes, SrcAS, DstAS, SrcAlign.value(), - DstAlign.value()); + DstAlign.value(), AtomicElementSize); for (auto OpTy : RemainingOps) { Align PartSrcAlign(commonAlignment(SrcAlign, BytesCopied)); @@ -101,6 +128,10 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, // Calaculate the new index unsigned OperandSize = DL.getTypeStoreSize(OpTy); + assert( + (!AtomicElementSize || OperandSize % *AtomicElementSize == 0) && + "Atomic memcpy lowering is not supported for selected operand size"); + uint64_t GepIndex = BytesCopied / OperandSize; assert(GepIndex * OperandSize == BytesCopied && "Division should have no Remainder!"); @@ -111,9 +142,13 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, : RBuilder.CreateBitCast(SrcAddr, SrcPtrType); Value *SrcGEP = RBuilder.CreateInBoundsGEP( OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex)); - Value *Load = + LoadInst *Load = RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile); - + if (!CanOverlap) { + // Set alias scope for loads. + Load->setMetadata(LLVMContext::MD_alias_scope, + MDNode::get(Ctx, NewScope)); + } // Cast destination to operand type and store. PointerType *DstPtrType = PointerType::get(OpTy, DstAS); Value *CastedDst = DstAddr->getType() == DstPtrType @@ -121,8 +156,16 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, : RBuilder.CreateBitCast(DstAddr, DstPtrType); Value *DstGEP = RBuilder.CreateInBoundsGEP( OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex)); - RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); - + StoreInst *Store = RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, + DstIsVolatile); + if (!CanOverlap) { + // Indicate that stores don't overlap loads. + Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); + } + if (AtomicElementSize) { + Load->setAtomic(AtomicOrdering::Unordered); + Store->setAtomic(AtomicOrdering::Unordered); + } BytesCopied += OperandSize; } } @@ -134,8 +177,9 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, - bool DstIsVolatile, - const TargetTransformInfo &TTI) { + bool DstIsVolatile, bool CanOverlap, + const TargetTransformInfo &TTI, + Optional<uint32_t> AtomicElementSize) { BasicBlock *PreLoopBB = InsertBefore->getParent(); BasicBlock *PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion"); @@ -143,12 +187,22 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, Function *ParentFunc = PreLoopBB->getParent(); const DataLayout &DL = ParentFunc->getParent()->getDataLayout(); LLVMContext &Ctx = PreLoopBB->getContext(); + MDBuilder MDB(Ctx); + MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain"); + StringRef Name = "MemCopyAliasScope"; + MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); + unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); Type *LoopOpType = TTI.getMemcpyLoopLoweringType( - Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value()); + Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(), + AtomicElementSize); + assert((!AtomicElementSize || !LoopOpType->isVectorTy()) && + "Atomic memcpy lowering is not supported for vector operand type"); unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType); + assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) && + "Atomic memcpy lowering is not supported for selected operand size"); IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); @@ -183,19 +237,40 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); - Value *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, PartSrcAlign, - SrcIsVolatile); + LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, + PartSrcAlign, SrcIsVolatile); + if (!CanOverlap) { + // Set alias scope for loads. + Load->setMetadata(LLVMContext::MD_alias_scope, MDNode::get(Ctx, NewScope)); + } Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); - LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); - + StoreInst *Store = + LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); + if (!CanOverlap) { + // Indicate that stores don't overlap loads. + Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); + } + if (AtomicElementSize) { + Load->setAtomic(AtomicOrdering::Unordered); + Store->setAtomic(AtomicOrdering::Unordered); + } Value *NewIndex = LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); LoopIndex->addIncoming(NewIndex, LoopBB); - if (!LoopOpIsInt8) { - // Add in the - Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); - Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); + bool requiresResidual = + !LoopOpIsInt8 && !(AtomicElementSize && LoopOpSize == AtomicElementSize); + if (requiresResidual) { + Type *ResLoopOpType = AtomicElementSize + ? Type::getIntNTy(Ctx, *AtomicElementSize * 8) + : Int8Type; + unsigned ResLoopOpSize = DL.getTypeStoreSize(ResLoopOpType); + assert((ResLoopOpSize == AtomicElementSize ? *AtomicElementSize : 1) && + "Store size is expected to match type size"); + + // Add in the + Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); + Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); // Loop body for the residual copy. BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual", @@ -230,21 +305,34 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index"); ResidualIndex->addIncoming(Zero, ResHeaderBB); - Value *SrcAsInt8 = - ResBuilder.CreateBitCast(SrcAddr, PointerType::get(Int8Type, SrcAS)); - Value *DstAsInt8 = - ResBuilder.CreateBitCast(DstAddr, PointerType::get(Int8Type, DstAS)); + Value *SrcAsResLoopOpType = ResBuilder.CreateBitCast( + SrcAddr, PointerType::get(ResLoopOpType, SrcAS)); + Value *DstAsResLoopOpType = ResBuilder.CreateBitCast( + DstAddr, PointerType::get(ResLoopOpType, DstAS)); Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); - Value *SrcGEP = - ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset); - Value *Load = ResBuilder.CreateAlignedLoad(Int8Type, SrcGEP, PartSrcAlign, - SrcIsVolatile); - Value *DstGEP = - ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset); - ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); - - Value *ResNewIndex = - ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U)); + Value *SrcGEP = ResBuilder.CreateInBoundsGEP( + ResLoopOpType, SrcAsResLoopOpType, FullOffset); + LoadInst *Load = ResBuilder.CreateAlignedLoad(ResLoopOpType, SrcGEP, + PartSrcAlign, SrcIsVolatile); + if (!CanOverlap) { + // Set alias scope for loads. + Load->setMetadata(LLVMContext::MD_alias_scope, + MDNode::get(Ctx, NewScope)); + } + Value *DstGEP = ResBuilder.CreateInBoundsGEP( + ResLoopOpType, DstAsResLoopOpType, FullOffset); + StoreInst *Store = ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, + DstIsVolatile); + if (!CanOverlap) { + // Indicate that stores don't overlap loads. + Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); + } + if (AtomicElementSize) { + Load->setAtomic(AtomicOrdering::Unordered); + Store->setAtomic(AtomicOrdering::Unordered); + } + Value *ResNewIndex = ResBuilder.CreateAdd( + ResidualIndex, ConstantInt::get(CopyLenType, ResLoopOpSize)); ResidualIndex->addIncoming(ResNewIndex, ResLoopBB); // Create the loop branch condition. @@ -297,7 +385,13 @@ static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr, Function *F = OrigBB->getParent(); const DataLayout &DL = F->getParent()->getDataLayout(); - Type *EltTy = SrcAddr->getType()->getPointerElementType(); + // TODO: Use different element type if possible? + IRBuilder<> CastBuilder(InsertBefore); + Type *EltTy = CastBuilder.getInt8Ty(); + Type *PtrTy = + CastBuilder.getInt8PtrTy(SrcAddr->getType()->getPointerAddressSpace()); + SrcAddr = CastBuilder.CreateBitCast(SrcAddr, PtrTy); + DstAddr = CastBuilder.CreateBitCast(DstAddr, PtrTy); // Create the a comparison of src and dst, based on which we jump to either // the forward-copy part of the function (if src >= dst) or the backwards-copy @@ -419,8 +513,21 @@ static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, NewBB); } +template <typename T> +static bool canOverlap(MemTransferBase<T> *Memcpy, ScalarEvolution *SE) { + if (SE) { + auto *SrcSCEV = SE->getSCEV(Memcpy->getRawSource()); + auto *DestSCEV = SE->getSCEV(Memcpy->getRawDest()); + if (SE->isKnownPredicateAt(CmpInst::ICMP_NE, SrcSCEV, DestSCEV, Memcpy)) + return false; + } + return true; +} + void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, - const TargetTransformInfo &TTI) { + const TargetTransformInfo &TTI, + ScalarEvolution *SE) { + bool CanOverlap = canOverlap(Memcpy, SE); if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) { createMemCpyLoopKnownSize( /* InsertBefore */ Memcpy, @@ -431,6 +538,7 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), /* SrcIsVolatile */ Memcpy->isVolatile(), /* DstIsVolatile */ Memcpy->isVolatile(), + /* CanOverlap */ CanOverlap, /* TargetTransformInfo */ TTI); } else { createMemCpyLoopUnknownSize( @@ -442,6 +550,7 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), /* SrcIsVolatile */ Memcpy->isVolatile(), /* DstIsVolatile */ Memcpy->isVolatile(), + /* CanOverlap */ CanOverlap, /* TargetTransformInfo */ TTI); } } @@ -465,3 +574,35 @@ void llvm::expandMemSetAsLoop(MemSetInst *Memset) { /* Alignment */ Memset->getDestAlign().valueOrOne(), Memset->isVolatile()); } + +void llvm::expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemcpy, + const TargetTransformInfo &TTI, + ScalarEvolution *SE) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(AtomicMemcpy->getLength())) { + createMemCpyLoopKnownSize( + /* InsertBefore */ AtomicMemcpy, + /* SrcAddr */ AtomicMemcpy->getRawSource(), + /* DstAddr */ AtomicMemcpy->getRawDest(), + /* CopyLen */ CI, + /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(), + /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(), + /* SrcIsVolatile */ AtomicMemcpy->isVolatile(), + /* DstIsVolatile */ AtomicMemcpy->isVolatile(), + /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. + /* TargetTransformInfo */ TTI, + /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes()); + } else { + createMemCpyLoopUnknownSize( + /* InsertBefore */ AtomicMemcpy, + /* SrcAddr */ AtomicMemcpy->getRawSource(), + /* DstAddr */ AtomicMemcpy->getRawDest(), + /* CopyLen */ AtomicMemcpy->getLength(), + /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(), + /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(), + /* SrcIsVolatile */ AtomicMemcpy->isVolatile(), + /* DstIsVolatile */ AtomicMemcpy->isVolatile(), + /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. + /* TargetTransformInfo */ TTI, + /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes()); + } +} |
