aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp')
-rw-r--r--llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp217
1 files changed, 179 insertions, 38 deletions
diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 3d75dd57456d..b4acb1b2ae90 100644
--- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -7,9 +7,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
@@ -18,7 +20,9 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
Value *DstAddr, ConstantInt *CopyLen,
Align SrcAlign, Align DstAlign,
bool SrcIsVolatile, bool DstIsVolatile,
- const TargetTransformInfo &TTI) {
+ bool CanOverlap,
+ const TargetTransformInfo &TTI,
+ Optional<uint32_t> AtomicElementSize) {
// No need to expand zero length copies.
if (CopyLen->isZero())
return;
@@ -28,15 +32,25 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
Function *ParentFunc = PreLoopBB->getParent();
LLVMContext &Ctx = PreLoopBB->getContext();
const DataLayout &DL = ParentFunc->getParent()->getDataLayout();
+ MDBuilder MDB(Ctx);
+ MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain");
+ StringRef Name = "MemCopyAliasScope";
+ MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
Type *TypeOfCopyLen = CopyLen->getType();
Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
- Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value());
+ Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(),
+ AtomicElementSize);
+ assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
+ "Atomic memcpy lowering is not supported for vector operand type");
unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
+ assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
+ "Atomic memcpy lowering is not supported for selected operand size");
+
uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize;
if (LoopEndCount != 0) {
@@ -68,12 +82,25 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
// Loop Body
Value *SrcGEP =
LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
- Value *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP,
- PartSrcAlign, SrcIsVolatile);
+ LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP,
+ PartSrcAlign, SrcIsVolatile);
+ if (!CanOverlap) {
+ // Set alias scope for loads.
+ Load->setMetadata(LLVMContext::MD_alias_scope,
+ MDNode::get(Ctx, NewScope));
+ }
Value *DstGEP =
LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
- LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
-
+ StoreInst *Store = LoopBuilder.CreateAlignedStore(
+ Load, DstGEP, PartDstAlign, DstIsVolatile);
+ if (!CanOverlap) {
+ // Indicate that stores don't overlap loads.
+ Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
+ }
+ if (AtomicElementSize) {
+ Load->setAtomic(AtomicOrdering::Unordered);
+ Store->setAtomic(AtomicOrdering::Unordered);
+ }
Value *NewIndex =
LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U));
LoopIndex->addIncoming(NewIndex, LoopBB);
@@ -93,7 +120,7 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
SmallVector<Type *, 5> RemainingOps;
TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
SrcAS, DstAS, SrcAlign.value(),
- DstAlign.value());
+ DstAlign.value(), AtomicElementSize);
for (auto OpTy : RemainingOps) {
Align PartSrcAlign(commonAlignment(SrcAlign, BytesCopied));
@@ -101,6 +128,10 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
// Calaculate the new index
unsigned OperandSize = DL.getTypeStoreSize(OpTy);
+ assert(
+ (!AtomicElementSize || OperandSize % *AtomicElementSize == 0) &&
+ "Atomic memcpy lowering is not supported for selected operand size");
+
uint64_t GepIndex = BytesCopied / OperandSize;
assert(GepIndex * OperandSize == BytesCopied &&
"Division should have no Remainder!");
@@ -111,9 +142,13 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
: RBuilder.CreateBitCast(SrcAddr, SrcPtrType);
Value *SrcGEP = RBuilder.CreateInBoundsGEP(
OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex));
- Value *Load =
+ LoadInst *Load =
RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile);
-
+ if (!CanOverlap) {
+ // Set alias scope for loads.
+ Load->setMetadata(LLVMContext::MD_alias_scope,
+ MDNode::get(Ctx, NewScope));
+ }
// Cast destination to operand type and store.
PointerType *DstPtrType = PointerType::get(OpTy, DstAS);
Value *CastedDst = DstAddr->getType() == DstPtrType
@@ -121,8 +156,16 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
: RBuilder.CreateBitCast(DstAddr, DstPtrType);
Value *DstGEP = RBuilder.CreateInBoundsGEP(
OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex));
- RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
-
+ StoreInst *Store = RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign,
+ DstIsVolatile);
+ if (!CanOverlap) {
+ // Indicate that stores don't overlap loads.
+ Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
+ }
+ if (AtomicElementSize) {
+ Load->setAtomic(AtomicOrdering::Unordered);
+ Store->setAtomic(AtomicOrdering::Unordered);
+ }
BytesCopied += OperandSize;
}
}
@@ -134,8 +177,9 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
Value *SrcAddr, Value *DstAddr,
Value *CopyLen, Align SrcAlign,
Align DstAlign, bool SrcIsVolatile,
- bool DstIsVolatile,
- const TargetTransformInfo &TTI) {
+ bool DstIsVolatile, bool CanOverlap,
+ const TargetTransformInfo &TTI,
+ Optional<uint32_t> AtomicElementSize) {
BasicBlock *PreLoopBB = InsertBefore->getParent();
BasicBlock *PostLoopBB =
PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion");
@@ -143,12 +187,22 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
Function *ParentFunc = PreLoopBB->getParent();
const DataLayout &DL = ParentFunc->getParent()->getDataLayout();
LLVMContext &Ctx = PreLoopBB->getContext();
+ MDBuilder MDB(Ctx);
+ MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain");
+ StringRef Name = "MemCopyAliasScope";
+ MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
+
unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
- Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value());
+ Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(),
+ AtomicElementSize);
+ assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
+ "Atomic memcpy lowering is not supported for vector operand type");
unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
+ assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
+ "Atomic memcpy lowering is not supported for selected operand size");
IRBuilder<> PLBuilder(PreLoopBB->getTerminator());
@@ -183,19 +237,40 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB);
Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
- Value *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, PartSrcAlign,
- SrcIsVolatile);
+ LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP,
+ PartSrcAlign, SrcIsVolatile);
+ if (!CanOverlap) {
+ // Set alias scope for loads.
+ Load->setMetadata(LLVMContext::MD_alias_scope, MDNode::get(Ctx, NewScope));
+ }
Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
- LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
-
+ StoreInst *Store =
+ LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
+ if (!CanOverlap) {
+ // Indicate that stores don't overlap loads.
+ Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
+ }
+ if (AtomicElementSize) {
+ Load->setAtomic(AtomicOrdering::Unordered);
+ Store->setAtomic(AtomicOrdering::Unordered);
+ }
Value *NewIndex =
LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U));
LoopIndex->addIncoming(NewIndex, LoopBB);
- if (!LoopOpIsInt8) {
- // Add in the
- Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize);
- Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
+ bool requiresResidual =
+ !LoopOpIsInt8 && !(AtomicElementSize && LoopOpSize == AtomicElementSize);
+ if (requiresResidual) {
+ Type *ResLoopOpType = AtomicElementSize
+ ? Type::getIntNTy(Ctx, *AtomicElementSize * 8)
+ : Int8Type;
+ unsigned ResLoopOpSize = DL.getTypeStoreSize(ResLoopOpType);
+ assert((ResLoopOpSize == AtomicElementSize ? *AtomicElementSize : 1) &&
+ "Store size is expected to match type size");
+
+ // Add in the
+ Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize);
+ Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
// Loop body for the residual copy.
BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual",
@@ -230,21 +305,34 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index");
ResidualIndex->addIncoming(Zero, ResHeaderBB);
- Value *SrcAsInt8 =
- ResBuilder.CreateBitCast(SrcAddr, PointerType::get(Int8Type, SrcAS));
- Value *DstAsInt8 =
- ResBuilder.CreateBitCast(DstAddr, PointerType::get(Int8Type, DstAS));
+ Value *SrcAsResLoopOpType = ResBuilder.CreateBitCast(
+ SrcAddr, PointerType::get(ResLoopOpType, SrcAS));
+ Value *DstAsResLoopOpType = ResBuilder.CreateBitCast(
+ DstAddr, PointerType::get(ResLoopOpType, DstAS));
Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex);
- Value *SrcGEP =
- ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset);
- Value *Load = ResBuilder.CreateAlignedLoad(Int8Type, SrcGEP, PartSrcAlign,
- SrcIsVolatile);
- Value *DstGEP =
- ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset);
- ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
-
- Value *ResNewIndex =
- ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U));
+ Value *SrcGEP = ResBuilder.CreateInBoundsGEP(
+ ResLoopOpType, SrcAsResLoopOpType, FullOffset);
+ LoadInst *Load = ResBuilder.CreateAlignedLoad(ResLoopOpType, SrcGEP,
+ PartSrcAlign, SrcIsVolatile);
+ if (!CanOverlap) {
+ // Set alias scope for loads.
+ Load->setMetadata(LLVMContext::MD_alias_scope,
+ MDNode::get(Ctx, NewScope));
+ }
+ Value *DstGEP = ResBuilder.CreateInBoundsGEP(
+ ResLoopOpType, DstAsResLoopOpType, FullOffset);
+ StoreInst *Store = ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign,
+ DstIsVolatile);
+ if (!CanOverlap) {
+ // Indicate that stores don't overlap loads.
+ Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
+ }
+ if (AtomicElementSize) {
+ Load->setAtomic(AtomicOrdering::Unordered);
+ Store->setAtomic(AtomicOrdering::Unordered);
+ }
+ Value *ResNewIndex = ResBuilder.CreateAdd(
+ ResidualIndex, ConstantInt::get(CopyLenType, ResLoopOpSize));
ResidualIndex->addIncoming(ResNewIndex, ResLoopBB);
// Create the loop branch condition.
@@ -297,7 +385,13 @@ static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
Function *F = OrigBB->getParent();
const DataLayout &DL = F->getParent()->getDataLayout();
- Type *EltTy = SrcAddr->getType()->getPointerElementType();
+ // TODO: Use different element type if possible?
+ IRBuilder<> CastBuilder(InsertBefore);
+ Type *EltTy = CastBuilder.getInt8Ty();
+ Type *PtrTy =
+ CastBuilder.getInt8PtrTy(SrcAddr->getType()->getPointerAddressSpace());
+ SrcAddr = CastBuilder.CreateBitCast(SrcAddr, PtrTy);
+ DstAddr = CastBuilder.CreateBitCast(DstAddr, PtrTy);
// Create the a comparison of src and dst, based on which we jump to either
// the forward-copy part of the function (if src >= dst) or the backwards-copy
@@ -419,8 +513,21 @@ static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
NewBB);
}
+template <typename T>
+static bool canOverlap(MemTransferBase<T> *Memcpy, ScalarEvolution *SE) {
+ if (SE) {
+ auto *SrcSCEV = SE->getSCEV(Memcpy->getRawSource());
+ auto *DestSCEV = SE->getSCEV(Memcpy->getRawDest());
+ if (SE->isKnownPredicateAt(CmpInst::ICMP_NE, SrcSCEV, DestSCEV, Memcpy))
+ return false;
+ }
+ return true;
+}
+
void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
- const TargetTransformInfo &TTI) {
+ const TargetTransformInfo &TTI,
+ ScalarEvolution *SE) {
+ bool CanOverlap = canOverlap(Memcpy, SE);
if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) {
createMemCpyLoopKnownSize(
/* InsertBefore */ Memcpy,
@@ -431,6 +538,7 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
/* DestAlign */ Memcpy->getDestAlign().valueOrOne(),
/* SrcIsVolatile */ Memcpy->isVolatile(),
/* DstIsVolatile */ Memcpy->isVolatile(),
+ /* CanOverlap */ CanOverlap,
/* TargetTransformInfo */ TTI);
} else {
createMemCpyLoopUnknownSize(
@@ -442,6 +550,7 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
/* DestAlign */ Memcpy->getDestAlign().valueOrOne(),
/* SrcIsVolatile */ Memcpy->isVolatile(),
/* DstIsVolatile */ Memcpy->isVolatile(),
+ /* CanOverlap */ CanOverlap,
/* TargetTransformInfo */ TTI);
}
}
@@ -465,3 +574,35 @@ void llvm::expandMemSetAsLoop(MemSetInst *Memset) {
/* Alignment */ Memset->getDestAlign().valueOrOne(),
Memset->isVolatile());
}
+
+void llvm::expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemcpy,
+ const TargetTransformInfo &TTI,
+ ScalarEvolution *SE) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(AtomicMemcpy->getLength())) {
+ createMemCpyLoopKnownSize(
+ /* InsertBefore */ AtomicMemcpy,
+ /* SrcAddr */ AtomicMemcpy->getRawSource(),
+ /* DstAddr */ AtomicMemcpy->getRawDest(),
+ /* CopyLen */ CI,
+ /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(),
+ /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(),
+ /* SrcIsVolatile */ AtomicMemcpy->isVolatile(),
+ /* DstIsVolatile */ AtomicMemcpy->isVolatile(),
+ /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
+ /* TargetTransformInfo */ TTI,
+ /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes());
+ } else {
+ createMemCpyLoopUnknownSize(
+ /* InsertBefore */ AtomicMemcpy,
+ /* SrcAddr */ AtomicMemcpy->getRawSource(),
+ /* DstAddr */ AtomicMemcpy->getRawDest(),
+ /* CopyLen */ AtomicMemcpy->getLength(),
+ /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(),
+ /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(),
+ /* SrcIsVolatile */ AtomicMemcpy->isVolatile(),
+ /* DstIsVolatile */ AtomicMemcpy->isVolatile(),
+ /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
+ /* TargetTransformInfo */ TTI,
+ /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes());
+ }
+}