src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2021-07-29 20:15:26 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2021-07-29 20:15:26 +0000
commit	344a3780b2e33f6ca763666c380202b18aab72a3 (patch)
tree	f0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
parent	b60736ec1405bb0a8dd40989f67ef4c93da068ab (diff)

vendor/llvm-project/llvmorg-13-init-16847-g88e66fa60ae5 vendor/llvm-project/llvmorg-12.0.1-rc2-0-ge7dac564cd0e vendor/llvm-project/llvmorg-12.0.1-0-gfed41342a82f

Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp')

-rw-r--r--

llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

186

1 files changed, 113 insertions, 73 deletions

diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index 6ec5590d76ba..3b90997100f1 100644
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

@@ -49,6 +49,7 @@

#include "llvm/ADT/Statistic.h"

#include "llvm/ADT/iterator_range.h"

#include "llvm/Analysis/AliasAnalysis.h"

+#include "llvm/Analysis/AssumptionCache.h"

#include "llvm/Analysis/MemoryLocation.h"

#include "llvm/Analysis/ScalarEvolution.h"

#include "llvm/Analysis/TargetTransformInfo.h"

@@ -111,6 +112,7 @@ using InstrListMap = MapVector<ChainID, InstrList>;

class Vectorizer {

Function &F;

AliasAnalysis &AA;

+ AssumptionCache &AC;

DominatorTree &DT;

ScalarEvolution &SE;

TargetTransformInfo &TTI;

@@ -118,9 +120,9 @@ class Vectorizer {

IRBuilder<> Builder;

public:

- Vectorizer(Function &F, AliasAnalysis &AA, DominatorTree &DT,

- ScalarEvolution &SE, TargetTransformInfo &TTI)

- : F(F), AA(AA), DT(DT), SE(SE), TTI(TTI),

+ Vectorizer(Function &F, AliasAnalysis &AA, AssumptionCache &AC,

+ DominatorTree &DT, ScalarEvolution &SE, TargetTransformInfo &TTI)

+ : F(F), AA(AA), AC(AC), DT(DT), SE(SE), TTI(TTI),

DL(F.getParent()->getDataLayout()), Builder(SE.getContext()) {}

bool run();

@@ -186,7 +188,7 @@ private:

/// Check if this load/store access is misaligned accesses.

bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,

- unsigned Alignment);

+ Align Alignment);

};

class LoadStoreVectorizerLegacyPass : public FunctionPass {

@@ -205,6 +207,7 @@ public:

void getAnalysisUsage(AnalysisUsage &AU) const override {

AU.addRequired<AAResultsWrapperPass>();

+ AU.addRequired<AssumptionCacheTracker>();

AU.addRequired<ScalarEvolutionWrapperPass>();

AU.addRequired<DominatorTreeWrapperPass>();

AU.addRequired<TargetTransformInfoWrapperPass>();

@@ -219,6 +222,7 @@ char LoadStoreVectorizerLegacyPass::ID = 0;

INITIALIZE_PASS_BEGIN(LoadStoreVectorizerLegacyPass, DEBUG_TYPE,

"Vectorize load and Store instructions", false, false)

INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)

+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker);

INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)

INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)

INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)

@@ -241,7 +245,10 @@ bool LoadStoreVectorizerLegacyPass::runOnFunction(Function &F) {

TargetTransformInfo &TTI =

getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);

- Vectorizer V(F, AA, DT, SE, TTI);

+ AssumptionCache &AC =

+ getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);

+ Vectorizer V(F, AA, AC, DT, SE, TTI);

return V.run();

}

@@ -254,8 +261,9 @@ PreservedAnalyses LoadStoreVectorizerPass::run(Function &F, FunctionAnalysisMana

DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);

ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F);

TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);

+ AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);

- Vectorizer V(F, AA, DT, SE, TTI);

+ Vectorizer V(F, AA, AC, DT, SE, TTI);

bool Changed = V.run();

PreservedAnalyses PA;

PA.preserveSet<CFGAnalyses>();

@@ -304,8 +312,8 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) {

return false;

// Make sure that A and B are different pointers of the same size type.

- Type *PtrATy = PtrA->getType()->getPointerElementType();

- Type *PtrBTy = PtrB->getType()->getPointerElementType();

+ Type *PtrATy = getLoadStoreType(A);

+ Type *PtrBTy = getLoadStoreType(B);

if (PtrA == PtrB ||

PtrATy->isVectorTy() != PtrBTy->isVectorTy() ||

DL.getTypeStoreSize(PtrATy) != DL.getTypeStoreSize(PtrBTy) ||

@@ -376,6 +384,81 @@ bool Vectorizer::areConsecutivePointers(Value *PtrA, Value *PtrB,

return lookThroughComplexAddresses(PtrA, PtrB, BaseDelta, Depth);

}

+static bool checkNoWrapFlags(Instruction *I, bool Signed) {

+ BinaryOperator *BinOpI = cast<BinaryOperator>(I);

+ return (Signed && BinOpI->hasNoSignedWrap()) ||

+ (!Signed && BinOpI->hasNoUnsignedWrap());

+static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA,

+ unsigned MatchingOpIdxA, Instruction *AddOpB,

+ unsigned MatchingOpIdxB, bool Signed) {

+ // If both OpA and OpB is an add with NSW/NUW and with

+ // one of the operands being the same, we can guarantee that the

+ // transformation is safe if we can prove that OpA won't overflow when

+ // IdxDiff added to the other operand of OpA.

+ // For example:

+ // %tmp7 = add nsw i32 %tmp2, %v0

+ // %tmp8 = sext i32 %tmp7 to i64

+ // ...

+ // %tmp11 = add nsw i32 %v0, 1

+ // %tmp12 = add nsw i32 %tmp2, %tmp11

+ // %tmp13 = sext i32 %tmp12 to i64

+ //

+ // Both %tmp7 and %tmp2 has the nsw flag and the first operand

+ // is %tmp2. It's guaranteed that adding 1 to %tmp7 won't overflow

+ // because %tmp11 adds 1 to %v0 and both %tmp11 and %tmp12 has the

+ // nsw flag.

+ assert(AddOpA->getOpcode() == Instruction::Add &&

+ AddOpB->getOpcode() == Instruction::Add &&

+ checkNoWrapFlags(AddOpA, Signed) && checkNoWrapFlags(AddOpB, Signed));

+ if (AddOpA->getOperand(MatchingOpIdxA) ==

+ AddOpB->getOperand(MatchingOpIdxB)) {

+ Value *OtherOperandA = AddOpA->getOperand(MatchingOpIdxA == 1 ? 0 : 1);

+ Value *OtherOperandB = AddOpB->getOperand(MatchingOpIdxB == 1 ? 0 : 1);

+ Instruction *OtherInstrA = dyn_cast<Instruction>(OtherOperandA);

+ Instruction *OtherInstrB = dyn_cast<Instruction>(OtherOperandB);

+ // Match `x +nsw/nuw y` and `x +nsw/nuw (y +nsw/nuw IdxDiff)`.

+ if (OtherInstrB && OtherInstrB->getOpcode() == Instruction::Add &&

+ checkNoWrapFlags(OtherInstrB, Signed) &&

+ isa<ConstantInt>(OtherInstrB->getOperand(1))) {

+ int64_t CstVal =

+ cast<ConstantInt>(OtherInstrB->getOperand(1))->getSExtValue();

+ if (OtherInstrB->getOperand(0) == OtherOperandA &&

+ IdxDiff.getSExtValue() == CstVal)

+ return true;

+ }

+ // Match `x +nsw/nuw (y +nsw/nuw -Idx)` and `x +nsw/nuw (y +nsw/nuw x)`.

+ if (OtherInstrA && OtherInstrA->getOpcode() == Instruction::Add &&

+ checkNoWrapFlags(OtherInstrA, Signed) &&

+ isa<ConstantInt>(OtherInstrA->getOperand(1))) {

+ int64_t CstVal =

+ cast<ConstantInt>(OtherInstrA->getOperand(1))->getSExtValue();

+ if (OtherInstrA->getOperand(0) == OtherOperandB &&

+ IdxDiff.getSExtValue() == -CstVal)

+ return true;

+ }

+ // Match `x +nsw/nuw (y +nsw/nuw c)` and

+ // `x +nsw/nuw (y +nsw/nuw (c + IdxDiff))`.

+ if (OtherInstrA && OtherInstrB &&

+ OtherInstrA->getOpcode() == Instruction::Add &&

+ OtherInstrB->getOpcode() == Instruction::Add &&

+ checkNoWrapFlags(OtherInstrA, Signed) &&

+ checkNoWrapFlags(OtherInstrB, Signed) &&

+ isa<ConstantInt>(OtherInstrA->getOperand(1)) &&

+ isa<ConstantInt>(OtherInstrB->getOperand(1))) {

+ int64_t CstValA =

+ cast<ConstantInt>(OtherInstrA->getOperand(1))->getSExtValue();

+ int64_t CstValB =

+ cast<ConstantInt>(OtherInstrB->getOperand(1))->getSExtValue();

+ if (OtherInstrA->getOperand(0) == OtherInstrB->getOperand(0) &&

+ IdxDiff.getSExtValue() == (CstValB - CstValA))

+ return true;

+ }

+ return false;

bool Vectorizer::lookThroughComplexAddresses(Value *PtrA, Value *PtrB,

APInt PtrDelta,

unsigned Depth) const {

@@ -430,73 +513,30 @@ bool Vectorizer::lookThroughComplexAddresses(Value *PtrA, Value *PtrB,

// Now we need to prove that adding IdxDiff to ValA won't overflow.

bool Safe = false;

- auto CheckFlags = [](Instruction *I, bool Signed) {

- BinaryOperator *BinOpI = cast<BinaryOperator>(I);

- return (Signed && BinOpI->hasNoSignedWrap()) ||

- (!Signed && BinOpI->hasNoUnsignedWrap());

- };

// First attempt: if OpB is an add with NSW/NUW, and OpB is IdxDiff added to

// ValA, we're okay.

if (OpB->getOpcode() == Instruction::Add &&

isa<ConstantInt>(OpB->getOperand(1)) &&

IdxDiff.sle(cast<ConstantInt>(OpB->getOperand(1))->getSExtValue()) &&

- CheckFlags(OpB, Signed))

+ checkNoWrapFlags(OpB, Signed))

Safe = true;

- // Second attempt: If both OpA and OpB is an add with NSW/NUW and with

- // the same LHS operand, we can guarantee that the transformation is safe

- // if we can prove that OpA won't overflow when IdxDiff added to the RHS

- // of OpA.

- // For example:

- // %tmp7 = add nsw i32 %tmp2, %v0

- // %tmp8 = sext i32 %tmp7 to i64

- // ...

- // %tmp11 = add nsw i32 %v0, 1

- // %tmp12 = add nsw i32 %tmp2, %tmp11

- // %tmp13 = sext i32 %tmp12 to i64

- //

- // Both %tmp7 and %tmp2 has the nsw flag and the first operand

- // is %tmp2. It's guaranteed that adding 1 to %tmp7 won't overflow

- // because %tmp11 adds 1 to %v0 and both %tmp11 and %tmp12 has the

- // nsw flag.

+ // Second attempt: check if we have eligible add NSW/NUW instruction

+ // sequences.

OpA = dyn_cast<Instruction>(ValA);

if (!Safe && OpA && OpA->getOpcode() == Instruction::Add &&

- OpB->getOpcode() == Instruction::Add &&

- OpA->getOperand(0) == OpB->getOperand(0) && CheckFlags(OpA, Signed) &&

- CheckFlags(OpB, Signed)) {

- Value *RHSA = OpA->getOperand(1);

- Value *RHSB = OpB->getOperand(1);

- Instruction *OpRHSA = dyn_cast<Instruction>(RHSA);

- Instruction *OpRHSB = dyn_cast<Instruction>(RHSB);

- // Match `x +nsw/nuw y` and `x +nsw/nuw (y +nsw/nuw IdxDiff)`.

- if (OpRHSB && OpRHSB->getOpcode() == Instruction::Add &&

- CheckFlags(OpRHSB, Signed) && isa<ConstantInt>(OpRHSB->getOperand(1))) {

- int64_t CstVal = cast<ConstantInt>(OpRHSB->getOperand(1))->getSExtValue();

- if (OpRHSB->getOperand(0) == RHSA && IdxDiff.getSExtValue() == CstVal)

- Safe = true;

- }

- // Match `x +nsw/nuw (y +nsw/nuw -Idx)` and `x +nsw/nuw (y +nsw/nuw x)`.

- if (OpRHSA && OpRHSA->getOpcode() == Instruction::Add &&

- CheckFlags(OpRHSA, Signed) && isa<ConstantInt>(OpRHSA->getOperand(1))) {

- int64_t CstVal = cast<ConstantInt>(OpRHSA->getOperand(1))->getSExtValue();

- if (OpRHSA->getOperand(0) == RHSB && IdxDiff.getSExtValue() == -CstVal)

- Safe = true;

- }

- // Match `x +nsw/nuw (y +nsw/nuw c)` and

- // `x +nsw/nuw (y +nsw/nuw (c + IdxDiff))`.

- if (OpRHSA && OpRHSB && OpRHSA->getOpcode() == Instruction::Add &&

- OpRHSB->getOpcode() == Instruction::Add && CheckFlags(OpRHSA, Signed) &&

- CheckFlags(OpRHSB, Signed) && isa<ConstantInt>(OpRHSA->getOperand(1)) &&

- isa<ConstantInt>(OpRHSB->getOperand(1))) {

- int64_t CstValA =

- cast<ConstantInt>(OpRHSA->getOperand(1))->getSExtValue();

- int64_t CstValB =

- cast<ConstantInt>(OpRHSB->getOperand(1))->getSExtValue();

- if (OpRHSA->getOperand(0) == OpRHSB->getOperand(0) &&

- IdxDiff.getSExtValue() == (CstValB - CstValA))

- Safe = true;

- }

+ OpB->getOpcode() == Instruction::Add && checkNoWrapFlags(OpA, Signed) &&

+ checkNoWrapFlags(OpB, Signed)) {

+ // In the checks below a matching operand in OpA and OpB is

+ // an operand which is the same in those two instructions.

+ // Below we account for possible orders of the operands of

+ // these add instructions.

+ for (unsigned MatchingOpIdxA : {0, 1})

+ for (unsigned MatchingOpIdxB : {0, 1})

+ if (!Safe)

+ Safe = checkIfSafeAddSequence(IdxDiff, OpA, MatchingOpIdxA, OpB,

+ MatchingOpIdxB, Signed);

}

unsigned BitWidth = ValA->getType()->getScalarSizeInBits();

@@ -506,11 +546,8 @@ bool Vectorizer::lookThroughComplexAddresses(Value *PtrA, Value *PtrB,

// are known to be zero in ValA, we can add Diff to it while guaranteeing no

// overflow of any sort.

if (!Safe) {

- OpA = dyn_cast<Instruction>(ValA);

- if (!OpA)

- return false;

KnownBits Known(BitWidth);

- computeKnownBits(OpA, Known, DL, 0, nullptr, OpA, &DT);

+ computeKnownBits(ValA, Known, DL, 0, &AC, OpB, &DT);

APInt BitsAllowedToBeSet = Known.Zero.zext(IdxDiff.getBitWidth());

if (Signed)

BitsAllowedToBeSet.clearBit(BitWidth - 1);

@@ -670,6 +707,9 @@ Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) {

cast<IntrinsicInst>(&I)->getIntrinsicID() ==

Intrinsic::pseudoprobe) {

// Ignore llvm.pseudoprobe calls.

+ } else if (isa<IntrinsicInst>(&I) &&

+ cast<IntrinsicInst>(&I)->getIntrinsicID() == Intrinsic::assume) {

+ // Ignore llvm.assume calls.

} else if (IsLoadChain && (I.mayWriteToMemory() || I.mayThrow())) {

LLVM_DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I

<< '\n');

@@ -1061,7 +1101,7 @@ bool Vectorizer::vectorizeStoreChain(

InstructionsProcessed->insert(Chain.begin(), Chain.end());

// If the store is going to be misaligned, don't vectorize it.

- if (accessIsMisaligned(SzInBytes, AS, Alignment.value())) {

+ if (accessIsMisaligned(SzInBytes, AS, Alignment)) {

if (S0->getPointerAddressSpace() != DL.getAllocaAddrSpace()) {

auto Chains = splitOddVectorElts(Chain, Sz);

return vectorizeStoreChain(Chains.first, InstructionsProcessed) |

@@ -1206,7 +1246,7 @@ bool Vectorizer::vectorizeLoadChain(

InstructionsProcessed->insert(Chain.begin(), Chain.end());

// If the load is going to be misaligned, don't vectorize it.

- if (accessIsMisaligned(SzInBytes, AS, Alignment.value())) {

+ if (accessIsMisaligned(SzInBytes, AS, Alignment)) {

if (L0->getPointerAddressSpace() != DL.getAllocaAddrSpace()) {

auto Chains = splitOddVectorElts(Chain, Sz);

return vectorizeLoadChain(Chains.first, InstructionsProcessed) |

@@ -1301,8 +1341,8 @@ bool Vectorizer::vectorizeLoadChain(

}

bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,

- unsigned Alignment) {

- if (Alignment % SzInBytes == 0)

+ Align Alignment) {

+ if (Alignment.value() % SzInBytes == 0)

return false;

bool Fast = false;